Lucene5学习之创建索引入门示例-阿里云开发者社区

Lucene更新实在太快了，只好紧跟脚步开始学习Lucene5,花了点时间写了一个demo，就是程序根据用户提供的一个文件夹，读取该文件夹下的所有文件，然后读取文件里的内容写入索引。读取文件部分采用的是最新的NIO2.0API,因此，JDK必须使用1.7及以上版本。Lucene5开发压缩包请在Lucene官网下载。不多说了，对于码农来说，最直接的就是上代码。

   Java代码  
    
  
package com.yida.framework.lucene5.core;  
  
import java.io.BufferedReader;  
import java.io.IOException;  
import java.io.InputStream;  
import java.io.InputStreamReader;  
import java.nio.charset.StandardCharsets;  
import java.nio.file.FileVisitResult;  
import java.nio.file.Files;  
import java.nio.file.LinkOption;  
import java.nio.file.OpenOption;  
import java.nio.file.Path;  
import java.nio.file.Paths;  
import java.nio.file.SimpleFileVisitor;  
import java.nio.file.attribute.BasicFileAttributes;  
  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.LongField;  
import org.apache.lucene.document.StringField;  
import org.apache.lucene.document.TextField;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.FSDirectory;  
  
/** 
 * 读取硬盘文件，创建索引 
 *  
 * @author Lanxiaowei 
 *  
 */  
@SuppressWarnings({ "unchecked", "unused", "rawtypes" })  
public class IndexFile {  
    public static void main(String[] args) throws IOException {  
        String dirPath = "D:/docPath";  
        String indexPath = "D:/lucenedir";  
        createIndex(dirPath, indexPath);  
    }  
      
    /** 
     * 创建索引 
     * @param dirPath       需要读取的文件所在文件目录 
     * @param indexPath     索引存放目录 
     * @throws IOException 
     */  
    public static void createIndex(String dirPath, String indexPath) throws IOException {  
        createIndex(dirPath, indexPath, false);  
    }  
      
    /** 
     * 创建索引 
     * @param dirPath         需要读取的文件所在文件目录 
     * @param indexPath       索引存放目录 
     * @param createOrAppend  始终重建索引/不存在则追加索引 
     * @throws IOException 
     */  
    public static void createIndex(String dirPath, String indexPath,  
            boolean createOrAppend) throws IOException {  
        long start = System.currentTimeMillis();  
        Directory dir = FSDirectory.open(Paths.get(indexPath, new String[0]));  
        Path docDirPath = Paths.get(dirPath, new String[0]);  
        Analyzer analyzer = new StandardAnalyzer();  
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);  
  
        if (createOrAppend) {  
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);  
        } else {  
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);  
        }  
        IndexWriter writer = new IndexWriter(dir, indexWriterConfig);  
        indexDocs(writer, docDirPath);  
        writer.close();  
        long end = System.currentTimeMillis();  
        System.out.println("Time consumed:" + (end - start) + " ms");  
    }  
  
    /** 
     *  
     * @param writer 
     *            索引写入器 
     * @param path 
     *            文件路径 
     * @throws IOException 
     */  
    public static void indexDocs(final IndexWriter writer, Path path)  
            throws IOException {  
        // 如果是目录，查找目录下的文件  
        if (Files.isDirectory(path, new LinkOption[0])) {  
            System.out.println("directory");  
            Files.walkFileTree(path, new SimpleFileVisitor() {  
                @Override  
                public FileVisitResult visitFile(Object file,  
                        BasicFileAttributes attrs) throws IOException {  
                    Path path = (Path)file;  
                    System.out.println(path.getFileName());  
                    indexDoc(writer, path, attrs.lastModifiedTime().toMillis());  
                    return FileVisitResult.CONTINUE;  
                }  
            });  
        } else {  
            indexDoc(writer, path,  
                    Files.getLastModifiedTime(path, new LinkOption[0])  
                            .toMillis());  
        }  
    }  
  
    /** 
     * 读取文件创建索引 
     *  
     * @param writer 
     *            索引写入器 
     * @param file 
     *            文件路径 
     * @param lastModified 
     *            文件最后一次修改时间 
     * @throws IOException 
     */  
    public static void indexDoc(IndexWriter writer, Path file, long lastModified)  
            throws IOException {  
        InputStream stream = Files.newInputStream(file, new OpenOption[0]);  
        Document doc = new Document();  
  
        Field pathField = new StringField("path", file.toString(),  
                Field.Store.YES);  
        doc.add(pathField);  
  
        doc.add(new LongField("modified", lastModified, Field.Store.NO));  
        doc.add(new TextField("contents", new BufferedReader(  
                new InputStreamReader(stream, StandardCharsets.UTF_8))));  
  
        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {  
            System.out.println("adding " + file);  
            writer.addDocument(doc);  
        } else {  
            System.out.println("updating " + file);  
            writer.updateDocument(new Term("path", file.toString()), doc);  
        }  
        writer.commit();  
    }  
}  

项目采用的是Maven构建，怎么创建Maven Project就不用介绍了吧，我就贴下pom配置吧。

   Xml代码  
    
  
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">  
    <modelVersion>4.0.0</modelVersion>  
    <groupId>com.yida.framework</groupId>  
    <artifactId>lucene5</artifactId>  
    <packaging>war</packaging>  
    <version>1.0</version>  
    <name>lucene5 Maven Webapp</name>  
    <url>http://maven.apache.org</url>  
      
    <properties>  
        <lucene.version>5.0.0</lucene.version>  
    </properties>  
      
    <dependencies>  
        <dependency>  
            <groupId>junit</groupId>  
            <artifactId>junit</artifactId>  
            <version>3.8.1</version>  
            <scope>test</scope>  
        </dependency>  
        <dependency>  
            <groupId>org.apache.lucene</groupId>  
            <artifactId>lucene-core</artifactId>  
            <version>${lucene.version}</version>  
        </dependency>  
        <dependency>  
            <groupId>org.apache.lucene</groupId>  
            <artifactId>lucene-analyzers-common</artifactId>  
            <version>${lucene.version}</version>  
        </dependency>  
        <dependency>  
            <groupId>org.apache.lucene</groupId>  
            <artifactId>lucene-queryparser</artifactId>  
            <version>${lucene.version}</version>  
        </dependency>  
        <dependency>  
            <groupId>org.apache.lucene</groupId>  
            <artifactId>lucene-highlighter</artifactId>  
            <version>${lucene.version}</version>  
        </dependency>  
    </dependencies>  
    <build>  
        <finalName>lucene5</finalName>  
    </build>  
</project>