Lucene相关Maven依赖
<!-- lucene的核心 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>6.6.0</version> </dependency> <!-- lucene的分词器,有标准的英文相关的分词器,没有中文的 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>6.6.0</version> </dependency> <!-- 查询解析器 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>6.6.0</version> </dependency> <!-- 各种查询方式 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queries</artifactId> <version>6.6.0</version> </dependency> <!-- 关键字高亮 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>6.6.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-demo</artifactId> <version>6.6.0</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency>
Lucene测试用例
- 实体对象
import org.apache.lucene.document.*; import org.apache.lucene.document.Field.Store; public class Article { /** * 主键 */ private Long id; /** * 标题 */ private String title; /** * 内容 */ private String content; /** * 作者 */ private String author; /** * 链接 */ private String url; public Article() { } public Article(Long id, String title, String content, String author, String url) { super(); this.id = id; this.title = title; this.content = content; this.author = author; this.url = url; } public Long getId() { return id; } public void setId(Long id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } /** * 生成Lucene存储的格式 */ public Document toDocument() { //Lucene存储的格式(Map装的k,v) Document doc = new Document(); //向文档中添加一个long类型的属性,建立索引 doc.add(new LongPoint("id", id)); //在文档中存储 doc.add(new StoredField("id", id)); //设置一个文本类型,会对内容进行分词,建立索引,并将内容在文档中存储 doc.add(new TextField("title", title, Store.YES)); //设置一个文本类型,会对内容进行分词,建立索引,存在文档中存储 / No代表不存储 doc.add(new TextField("content", content, Store.YES)); //StringField,不分词,建立索引,文档中存储 doc.add(new StringField("author", author, Store.YES)); //不分词,不建立索引,在文档中存储, doc.add(new StoredField("url", url)); return doc; } /** * 解析Lucene存储的格式 */ public static Article parseArticle(Document doc) { Long id = Long.parseLong(doc.get("id")); String title = doc.get("title"); String content = doc.get("content"); String author = doc.get("author"); String url = doc.get("url"); Article article = new Article(id, title, content, author, url); return article; } @Override public String toString() { return "id : " + id + " , title : " + title + " , content : " + content + " , author : " + author + " , url : " + url; } }
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.store.FSDirectory; import org.junit.Test; import org.wltea.analyzer.lucene.IKAnalyzer; import java.io.IOException; import java.nio.file.Paths; /** * @Title: Lucene增删改查 * @ClassName: cn.edu360.lucene.LuceneCRUDTest.java * @Description: https://segmentfault.com/a/1190000010367206 * https://github.com/DmitryKey/luke/releases * @Copyright 2016-2018 - Powered By 研发中心 * @author: 王延飞 */ public class LuceneCRUDTest { /** * @Title: 写入数据 * @methodName: testCreate * @Description: * @author: 王延飞 */ @Test public void testCreate() throws IOException { Article article = new Article(); article.setId(108L); article.setAuthor("FLY"); article.setTitle("学习大数据"); article.setContent("学数据,像毕老师一样牛!"); article.setUrl("https://blog.csdn.net/fly910905/article/details/81190382"); // String indexPath = "D:\\usr\\lucene\\index"; String indexPath = "D:\\usr\\lucene\\index"; FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath)); //创建一个标准分词器,一个字分一次 //Analyzer analyzer = new StandardAnalyzer(); Analyzer analyzer = new IKAnalyzer(true); //写入索引的配置,设置了分词器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //指定了写入数据目录和配置 IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig); //创建一个文档对象 Document document = article.toDocument(); System.out.println(document); //通过IndexWriter写入 indexWriter.addDocument(document); indexWriter.close(); } /** * @Title: 查询数据 * @methodName: testSearch * @Description: * @author: 王延飞 * @date: 2018-08-05 16:01 */ @Test public void testSearch() throws IOException, ParseException { String indexPath = "D:\\usr\\lucene\\index"; //Analyzer analyzer = new StandardAnalyzer(); Analyzer analyzer = new IKAnalyzer(true); DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); //索引查询器 IndexSearcher indexSearcher = new IndexSearcher(directoryReader); String queryStr = "数据"; //创建一个查询条件解析器 QueryParser parser = new QueryParser("content", analyzer); //对查询条件进行解析 Query query = parser.parse(queryStr); //TermQuery将查询条件当成是一个固定的词 //Query query = new TermQuery(new Term("url", "http://www.edu360.cn/a10010")); //在【索引】中进行查找 TopDocs topDocs = indexSearcher.search(query, 10); //获取到查找到的文文档ID和得分 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { //从索引中查询到文档的ID, int doc = scoreDoc.doc; //在根据ID到文档中查找文档内容 Document document = indexSearcher.doc(doc); //将文档转换成对应的实体类 Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); } /** * @Title: 删除数据 * @methodName: testDelete * @Description: * @author: 王延飞 * @date: 2018-08-05 16:01 */ @Test public void testDelete() throws IOException, ParseException { String indexPath = "D:\\usr\\lucene\\index"; Analyzer analyzer = new IKAnalyzer(true); FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath)); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig); //Term词条查找,内容必须完全匹配,不分词 //indexWriter.deleteDocuments(new Term("content", "学好")); // 分词器查找 //QueryParser parser = new QueryParser("title", analyzer); //Query query = parser.parse("大数据老师"); //LongPoint是建立索引的 // LongPoint.newRangeQuery()--范围查询 //Query query = LongPoint.newRangeQuery("id", 99L, 120L); // LongPoint.newExactQuery()--精确查询 Query query = LongPoint.newExactQuery("id", 105L); indexWriter.deleteDocuments(query); indexWriter.commit(); indexWriter.close(); } /** * @Title: 更新数据 * @methodName: testUpdate * @Description: lucene的update比较特殊,update的代价太高,先删除,然后在插入 * @author: 王延飞 */ @Test public void testUpdate() throws IOException, ParseException { String indexPath = "D:\\usr\\lucene\\index"; StandardAnalyzer analyzer = new StandardAnalyzer(); FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath)); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig); Article article = new Article(); article.setId(106L); article.setAuthor("老王"); article.setTitle("学好大数据,要找赵老师"); article.setContent("迎娶白富美,走上人生巅峰!!!"); article.setUrl("https://blog.csdn.net/fly910905/article/details/81190382"); Document document = article.toDocument(); indexWriter.updateDocument(new Term("author", "老王"), document); indexWriter.commit(); indexWriter.close(); } /** * @Title: 查询多个字段 * @methodName: testMultiField * @Description: * @author: 王延飞 */ @Test public void testMultiField() throws IOException, ParseException { String indexPath = "D:\\usr\\lucene\\index"; Analyzer analyzer = new IKAnalyzer(true); DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); String[] fields = {"title", "content"}; //多字段的查询转换器 MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields, analyzer); Query query = queryParser.parse("老师"); TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); } /** * @Title: 查找全部的数据 * @methodName: testMatchAll * @Description: * @author: 王延飞 */ @Test public void testMatchAll() throws IOException, ParseException { String indexPath = "D:\\usr\\lucene\\index"; DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); Query query = new MatchAllDocsQuery(); TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); } /** * @Title: 布尔查询,可以组合多个查询条件 * @methodName: testBooleanQuery * @Description: * @author: 王延飞 */ @Test public void testBooleanQuery() throws Exception { String indexPath = "D:\\usr\\lucene\\index"; DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); Query query1 = new TermQuery(new Term("title", "老师")); Query query2 = new TermQuery(new Term("content", "丁")); BooleanClause bc1 = new BooleanClause(query1, BooleanClause.Occur.MUST); BooleanClause bc2 = new BooleanClause(query2, BooleanClause.Occur.MUST_NOT); BooleanQuery boolQuery = new BooleanQuery.Builder().add(bc1).add(bc2).build(); System.out.println(boolQuery); TopDocs topDocs = indexSearcher.search(boolQuery, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); } /** * @Title: QueryParser查询解析 * @methodName: testQueryParser * @Description: * @author: 王延飞 */ @Test public void testQueryParser() throws Exception { String indexPath = "D:\\usr\\lucene\\index"; DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); //创建一个QueryParser对象。参数1:默认搜索域 参数2:分析器对象。 QueryParser queryParser = new QueryParser("title", new IKAnalyzer(true)); //Query query = queryParser.parse("数据"); Query query = queryParser.parse("title:学好 OR title:学习"); System.out.println(query); TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); } /** * @Title: 范围查询 * @methodName: testRangeQuery * @Description: * @author: 王延飞 */ @Test public void testRangeQuery() throws Exception { String indexPath = "D:\\usr\\lucene\\index"; DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); Query query = LongPoint.newRangeQuery("id", 107L, 108L); System.out.println(query); TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); } }