Lucene学习笔记(二)

简介:
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import junit.framework.TestCase;


public class BaseIndexTestCase extends TestCase 
{
    protected String[] keywords = {"1", "2"}; 
    protected String[] unindexed = {"Netherlands", "Italy"}; 
    protected String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"}; 
    protected String[] text = {"Amsterdam", "Venice"}; 
    protected Directory dir; 
 
    protected void setUp() throws IOException { 
        String indexDir =  
            System.getProperty("java.io.tmpdir", "tmp")  + 
            System.getProperty("file.separator") + "index-dir"; 
        dir = FSDirectory.getDirectory(indexDir, true); 
        addDocuments(dir); 
    } 
 
    protected void addDocuments(Directory dir) 
        throws IOException { 
        IndexWriter writer = new IndexWriter(dir, getAnalyzer(), true); 
        writer.setUseCompoundFile(isCompound()); 
        for (int i = 0; i < keywords.length; i++) 
        { 
            Document doc = new Document(); 
            doc.add(new Field("id",keywords[i],Field.Store.YES,Field.Index.UN_TOKENIZED));
            doc.add(new Field("country",unindexed[i],Field.Store.YES,Field.Index.NO));
            doc.add(new Field("contents",unstored[i],Field.Store.NO,Field.Index.TOKENIZED));
            doc.add(new Field("city",text[i],Field.Store.YES,Field.Index.TOKENIZED));
            writer.addDocument(doc); 
        } 
        writer.optimize(); 
        writer.close(); 
    } 
 
    protected Analyzer getAnalyzer() 
    { 
        return new SimpleAnalyzer(); 
    } 
    protected boolean isCompound()
    { 
        return true; 
    } 
    
    public void testIndexWriter() throws IOException
    {
        IndexWriter writer = new IndexWriter(dir,this.getAnalyzer(),false);
        assertEquals(keywords.length,writer.docCount());
         writer.close();
    }
    
    public void testIndexReader() throws IOException 
    {
        IndexReader reader = IndexReader.open(dir);
        assertEquals(keywords.length, reader.maxDoc());
        assertEquals(keywords.length, reader.numDocs());
        reader.close();
    }
}

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;


public class DocumentDeleteTest extends BaseIndexTestCase 
{
      public void testDeleteBeforeIndexMerge() throws IOException 
      {
        assertEquals(1, getHitCount("city", "Amsterdam"));

        IndexReader reader = IndexReader.open(dir);
        assertEquals(2, reader.maxDoc());
        assertEquals(2, reader.numDocs());
        

        reader.deleteDocument(1);

        assertTrue(reader.isDeleted(1));
        assertTrue(reader.hasDeletions());
        assertEquals(2, reader.maxDoc());
        assertEquals(1, reader.numDocs());

        reader.close();

        reader = IndexReader.open(dir);

        assertEquals(2, reader.maxDoc());
        assertEquals(1, reader.numDocs());

        reader.close();
      }

      public void testDeleteAfterIndexMerge() throws IOException 
      {
        IndexReader reader = IndexReader.open(dir);
        assertEquals(2, reader.maxDoc());
        assertEquals(2, reader.numDocs());
        reader.deleteDocument(1);
        reader.close();

        IndexWriter writer = new IndexWriter(dir, getAnalyzer(),false);
        writer.optimize();
        writer.close();

        reader = IndexReader.open(dir);

        assertFalse(reader.isDeleted(1));
        assertFalse(reader.hasDeletions());
        assertEquals(1, reader.maxDoc());
        assertEquals(1, reader.numDocs());

        reader.close();
      }
      

      private int getHitCount(String fieldName, String searchString)
        throws IOException {
        IndexSearcher searcher = new IndexSearcher(dir);
        Term t = new Term(fieldName, searchString);
        Query query = new TermQuery(t);
        Hits hits = searcher.search(query);
        int hitCount = hits.length();
        searcher.close();
        return hitCount;
      }
      

      protected Analyzer getAnalyzer() {
        return new WhitespaceAnalyzer();
      }



}


import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;


public class DocumentUpdateTest extends BaseIndexTestCase 
{

      public void testUpdate() throws IOException 
      {
        assertEquals(1, getHitCount("city", "Amsterdam"));
        IndexReader reader = IndexReader.open(dir);
        reader.deleteDocuments(new Term("city", "Amsterdam"));
        reader.close();

        IndexWriter writer = new IndexWriter(dir, getAnalyzer(),
          false);
        Document doc = new Document();
          doc.add(new Field("id","1",Field.Store.YES,Field.Index.UN_TOKENIZED));
          doc.add(new Field("country","Russia",Field.Store.YES,Field.Index.NO));
          doc.add(new Field("contents","St. Petersburg has lots of bridges",Field.Store.NO,Field.Index.TOKENIZED));
          doc.add(new Field("city","St. Petersburg",Field.Store.YES,Field.Index.TOKENIZED));

        writer.addDocument(doc);
        writer.optimize();
        writer.close();

        assertEquals(0, getHitCount("city", "Amsterdam"));
        assertEquals(1, getHitCount("city", "Petersburg"));
      }

      protected Analyzer getAnalyzer() {
        return new WhitespaceAnalyzer();
      }

      private int getHitCount(String fieldName, String searchString)
        throws IOException {
        IndexSearcher searcher = new IndexSearcher(dir);
        Term t = new Term(fieldName, searchString);
        Query query = new TermQuery(t);
        Hits hits = searcher.search(query);
        int hitCount = hits.length();
        searcher.close();
        return hitCount;
      }

}


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;


public class IndexTuningDemo 
{
     public static void main(String[] args) throws Exception {
            int docsInIndex  = Integer.parseInt(args[0]);

            // create an index called 'index-dir' in a temp directory
            Directory dir = FSDirectory.getDirectory(
            System.getProperty("java.io.tmpdir", "tmp") +
            System.getProperty("file.separator") + "index-dir", true);
            Analyzer analyzer = new SimpleAnalyzer();
            IndexWriter writer = new IndexWriter(dir, analyzer, true);

            // set variables that affect speed of indexing
            writer.setMergeFactor(Integer.parseInt(args[1]));
            writer.setMaxMergeDocs(Integer.parseInt(args[2]));
            writer.setInfoStream(System.out);
            writer.setMaxBufferedDocs(Integer.parseInt(args[3]));

            System.out.println("Merge factor:   " + writer.getMergeFactor());
            System.out.println("Max merge docs: " + writer.getMaxMergeDocs());
            System.out.println("Min merge docs: " + writer.getMaxBufferedDocs());

            long start = System.currentTimeMillis();
            for (int i = 0; i < docsInIndex; i++) {
              Document doc = new Document();
              doc.add(new Field("fieldname", "Bibamus", Field.Store.YES,Field.Index.TOKENIZED));
              writer.addDocument(doc);
            }
            writer.close();
            long stop = System.currentTimeMillis();
            System.out.println("Time: " + (stop - start) + " ms");
          }

}


import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.SimpleAnalyzer;

import junit.framework.TestCase;
import java.io.IOException;
import java.util.Collection;
import java.util.ArrayList;
import java.util.Iterator;

public class FSversusRAMDirectoryTest extends TestCase 
{
  private Directory fsDir;
  private Directory ramDir;
  private Collection docs = loadDocuments(3000, 5);//加载数据

  protected void setUp() throws Exception 
  {
    String fsIndexDir = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "fs-index";
    ramDir = new RAMDirectory();//内存中目录
    fsDir = FSDirectory.getDirectory(fsIndexDir, true);
  }

  public void testTiming() throws IOException 
  {
    long ramTiming = timeIndexWriter(ramDir);
    long fsTiming = timeIndexWriter(fsDir);

    assertTrue(fsTiming > ramTiming);
    

    System.out.println("RAMDirectory Time: " + (ramTiming) + " ms");
    System.out.println("FSDirectory Time : " + (fsTiming) + " ms");
  }

  private long timeIndexWriter(Directory dir) throws IOException 
  {
    long start = System.currentTimeMillis();
    addDocuments(dir);
    long stop = System.currentTimeMillis();
    return (stop - start);
  }

  private void addDocuments(Directory dir) throws IOException 
  {
    IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(),true);

    /**
    // change to adjust performance of indexing with FSDirectory
    writer.mergeFactor = writer.mergeFactor;
    writer.maxMergeDocs = writer.maxMergeDocs;
    writer.minMergeDocs = writer.minMergeDocs;
    */

    for (Iterator iter = docs.iterator(); iter.hasNext();) 
    {
      Document doc = new Document();
      String word = (String) iter.next();
      doc.add(new Field("keyword",word,Field.Store.YES,Field.Index.UN_TOKENIZED));
      doc.add(new Field("unindexed",word,Field.Store.YES,Field.Index.NO));
      doc.add(new Field("unstored",word,Field.Store.NO,Field.Index.TOKENIZED));
      doc.add(new Field("text",word,Field.Store.YES,Field.Index.TOKENIZED));
      writer.addDocument(doc);
    }
    writer.optimize();
    writer.close();
  }

  private Collection loadDocuments(int numDocs, int wordsPerDoc) 
  {
    Collection docs = new ArrayList(numDocs);
    for (int i = 0; i < numDocs; i++) 
    {
      StringBuffer doc = new StringBuffer(wordsPerDoc);
      for (int j = 0; j < wordsPerDoc; j++) 
      {
        doc.append("Bibamus ");
      }
      docs.add(doc.toString());
    }
    return docs;
  }
}



本文转自Phinecos(洞庭散人)博客园博客,原文链接:http://www.cnblogs.com/phinecos/archive/2007/08/29/874728.html,如需转载请自行联系原作者
目录
相关文章
|
Kubernetes Cloud Native 持续交付
云原生技术在现代应用开发中的角色与实践
【9月更文挑战第9天】 随着云计算技术的飞速发展,云原生(Cloud Native)已经成为推动企业数字化转型的核心力量。本文将深入探讨云原生的基本概念、关键技术及其在实际开发中的应用案例,旨在为读者提供一条清晰的云原生技术学习路径和应用指南。通过实例分析,我们将揭示云原生如何优化资源管理、提升应用性能及加快部署速度,进而帮助企业构建更加灵活、可靠和高效的软件系统。
|
监控 算法 数据可视化
ERP系统中的生产调度与计划排程解析
【7月更文挑战第25天】 ERP系统中的生产调度与计划排程解析
656 1
|
Java 应用服务中间件 API
java 启动查看jar包加载顺序并设置classpath
java 启动查看jar包加载顺序并设置classpath
948 0
|
存储 安全 API
使用KMS为Apollo配置中心敏感配置加密的最佳实践
使用KMS为Apollo配置中心敏感配置加密的最佳实践
1920 4
|
存储 网络协议 Java
本地MinIO存储服务如何创建Buckets并实现公网访问上传文件
本地MinIO存储服务如何创建Buckets并实现公网访问上传文件
1996 0
|
设计模式 前端开发 Java
一篇文章让使你的Spring Mvc学习入门,还不来了解吗?
一篇文章让使你的Spring Mvc学习入门,还不来了解吗?
139 0
|
网络协议
网络协议之:memcached binary protocol详解
前面讲到了memcached的文本协议,虽然文本协议看起来非常简单,但是对于客户端来说一般还是会选择效率更高的二进制协议。 二进制协议的本质和文本协议是一样的,只是他们的表现方式不同而已。本文将会详细介绍memcached中二进制协议的实现细节。
网络协议之:memcached binary protocol详解
|
弹性计算 Linux 网络安全
使用密钥对登录 Linux 云主机
管理 Linux 云主机最简便的方法是使用密钥对登录,既安全,又省事。本文记录在 Mac OS 下使用密钥对登录 Linux 云主机的设置过程,同样也适用于多数 Linux 客户端。
2655 0