Lucene5学习之SpanQuery跨度查询-阿里云开发者社区

SpanQuery下的子类有好几个，我就放一篇里集中说说。SpanQuery即跨度查询，首先要理解跨度这个概念，Lucene里跨度是用Spans这个类定义的，源码如下：

   Java代码  
    
  
/** Expert: an enumeration of span matches.  Used to implement span searching. 
 * Each span represents a range of term positions within a document.  Matches 
 * are enumerated in order, by increasing document number, within that by 
 * increasing start position and finally by increasing end position. */  
public abstract class Spans {  
  /** Move to the next match, returning true iff any such exists. */  
  public abstract boolean next() throws IOException;  
  
  /** Skips to the first match beyond the current, whose document number is 
   * greater than or equal to <i>target</i>. 
   * <p>The behavior of this method is <b>undefined</b> when called with 
   * <code> target &le; current</code>, or after the iterator has exhausted. 
   * Both cases may result in unpredicted behavior. 
   * <p>Returns true iff there is such 
   * a match.  <p>Behaves as if written: <pre class="prettyprint"> 
   *   boolean skipTo(int target) { 
   *     do { 
   *       if (!next()) 
   *         return false; 
   *     } while (target > doc()); 
   *     return true; 
   *   } 
   * </pre> 
   * Most implementations are considerably more efficient than that. 
   */  
  public abstract boolean skipTo(int target) throws IOException;  
  
  /** Returns the document number of the current match.  Initially invalid. */  
  public abstract int doc();  
  
  /** Returns the start position of the current match.  Initially invalid. */  
  public abstract int start();  
  
  /** Returns the end position of the current match.  Initially invalid. */  
  public abstract int end();  
    
  /** 
   * Returns the payload data for the current span. 
   * This is invalid until {@link #next()} is called for 
   * the first time. 
   * This method must not be called more than once after each call 
   * of {@link #next()}. However, most payloads are loaded lazily, 
   * so if the payload data for the current position is not needed, 
   * this method may not be called at all for performance reasons. An ordered 
   * SpanQuery does not lazy load, so if you have payloads in your index and 
   * you do not want ordered SpanNearQuerys to collect payloads, you can 
   * disable collection with a constructor option.<br> 
   * <br> 
    * Note that the return type is a collection, thus the ordering should not be relied upon. 
    * <br/> 
   * @lucene.experimental 
   * 
   * @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false 
   * @throws IOException if there is a low-level I/O error 
    */  
  // TODO: Remove warning after API has been finalized  
  public abstract Collection<byte[]> getPayload() throws IOException;  
  
  /** 
   * Checks if a payload can be loaded at this position. 
   * <p/> 
   * Payloads can only be loaded once per call to 
   * {@link #next()}. 
   * 
   * @return true if there is a payload available at this position that can be loaded 
   */  
  public abstract boolean isPayloadAvailable() throws IOException;  
    
  /** 
   * Returns the estimated cost of this spans. 
   * <p> 
   * This is generally an upper bound of the number of documents this iterator 
   * might match, but may be a rough heuristic, hardcoded value, or otherwise 
   * completely inaccurate. 
   */  
  public abstract long cost();  
}  

跨度里包含了匹配Term的起始位置和结束位置信息以及跨度价值估算值以及payload信息等等。

首先要说的就是SpanTermQuery，他和TermQuery用法很相似，唯一区别就是SapnTermQuery可以得到Term的span跨度信息，用法如下：

   Java代码  
    
  
package com.yida.framework.lucene5.query;  
  
import java.io.IOException;  
  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.TextField;  
import org.apache.lucene.index.DirectoryReader;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.index.IndexWriterConfig.OpenMode;  
import org.apache.lucene.search.AutomatonQuery;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.MultiTermQuery;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.spans.SpanQuery;  
import org.apache.lucene.search.spans.SpanTermQuery;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.RAMDirectory;  
import org.apache.lucene.util.automaton.Automata;  
import org.apache.lucene.util.automaton.Automaton;  
/** 
 * SpanTermQuery用法测试 
 * @author Lanxiaowei 
 * 
 */  
public class SpanTermQueryTest {  
    public static void main(String[] args) throws IOException {  
        Directory dir = new RAMDirectory();  
        Analyzer analyzer = new StandardAnalyzer();  
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);  
        iwc.setOpenMode(OpenMode.CREATE);  
        IndexWriter writer = new IndexWriter(dir, iwc);  
  
        Document doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick red fox jumps over the sleepy cat", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
        writer.close();  
  
        IndexReader reader = DirectoryReader.open(dir);  
        IndexSearcher searcher = new IndexSearcher(reader);  
          
        String queryString = "red";  
        SpanQuery query = new SpanTermQuery(new Term("text",queryString));  
          
        TopDocs results = searcher.search(query, null, 100);  
        ScoreDoc[] scoreDocs = results.scoreDocs;  
          
        for (int i = 0; i < scoreDocs.length; ++i) {  
            //System.out.println(searcher.explain(query, scoreDocs[i].doc));  
            int docID = scoreDocs[i].doc;  
            Document document = searcher.doc(docID);  
            String path = document.get("text");  
            System.out.println("text:" + path);  
        }  
    }  
}  

SpanNearQuery：用来匹配两个Term之间的跨度的，即一个Term经过几个跨度可以到达另一个Term,slop为跨度因子，用来限制两个Term之间的最大跨度，不可能一个Term和另一个Term之间要经过十万八千个跨度才到达也算两者相近，这不符合常理。所以有个slop因子进行限制。还有一个inOrder参数要引起注意，它用来设置是否允许进行倒序跨度，什么意思？即TermA到TermB不一定是从左到右去匹配也可以从右到左，而从右到左就是倒序，inOrder为true即表示order(顺序)很重要不能倒序去匹配必须正向去匹配，false则反之。注意停用词不在slop统计范围内。

Slop的理解很重要：

在默认情况下slop的值是0, 就相当于TermQuery的精确匹配, 通过设置slop参数(比如"one five"匹配"one two three four five"就需要slop=3,如果slop=2就无法得到结果。这里我们可以认为slope是单词移动得次数，可以左移或者右移。这里特别提醒,PhraseQuery不保证前后单词的次序,在上面的例子中,"two one"就需要2个slop,也就是认为one 向左边移动2位, 就是能够匹配的”one two”如果是“five three one” 就需要slope=6才能匹配。

还有一个collectPayloads参数表示是否收集payload信息，关于payload后面再单独说。

SpanNearQuery的构造函数如下：

   Java代码  
    
  
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder, boolean collectPayloads) {  
  
    // copy clauses array into an ArrayList  
    this.clauses = new ArrayList<>(clauses.length);  
    for (int i = 0; i < clauses.length; i++) {  
      SpanQuery clause = clauses[i];  
      if (field == null) {                               // check field  
        field = clause.getField();  
      } else if (clause.getField() != null && !clause.getField().equals(field)) {  
        throw new IllegalArgumentException("Clauses must have same field.");  
      }  
      this.clauses.add(clause);  
    }  
    this.collectPayloads = collectPayloads;  
    this.slop = slop;  
    this.inOrder = inOrder;  
  }  

SpanNearQuery使用示例：

   Java代码  
    
  
/** 
 * SpanNearQuery测试 
 * @author Lanxiaowei 
 * 
 */  
public class SpanNearQueryTest {  
    public static void main(String[] args) throws IOException {  
        Directory dir = new RAMDirectory();  
        Analyzer analyzer = new StandardAnalyzer();  
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);  
        iwc.setOpenMode(OpenMode.CREATE);  
        IndexWriter writer = new IndexWriter(dir, iwc);  
  
        Document doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick red fox jumps over the sleepy cat", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
        writer.close();  
  
        IndexReader reader = DirectoryReader.open(dir);  
        IndexSearcher searcher = new IndexSearcher(reader);  
          
        String queryStringStart = "dog";  
        String queryStringEnd = "quick";  
        SpanQuery queryStart = new SpanTermQuery(new Term("text",queryStringStart));  
        SpanQuery queryEnd = new SpanTermQuery(new Term("text",queryStringEnd));  
        SpanQuery spanNearQuery = new SpanNearQuery(  
            new SpanQuery[] {queryStart,queryEnd}, 6, false, false);  
          
        TopDocs results = searcher.search(spanNearQuery, null, 100);  
        ScoreDoc[] scoreDocs = results.scoreDocs;  
          
        for (int i = 0; i < scoreDocs.length; ++i) {  
            //System.out.println(searcher.explain(query, scoreDocs[i].doc));  
            int docID = scoreDocs[i].doc;  
            Document document = searcher.doc(docID);  
            String path = document.get("text");  
            System.out.println("text:" + path);  
        }  
    }  
}  

示例中dog要到达quick需要经过6个跨度，需要从右至左倒序匹配，所以inOrder设置为false,如果设置为true会导致查询不出来数据。

SpanNotQuery:使用场景是当使用SpanNearQuery时，如果两个Term从TermA到TermB有多种情况，即可能出现TermA或者TermB在索引中重复出现，则可能有多种情况，SpanNotQuery就是用来限制TermA和TermB之间不存在TermC,从而排除一些情况，实现更精确的控制。默认SpanNotQuery的构造函数是这样的：

   Java代码  
    
  
/** Construct a SpanNotQuery matching spans from <code>include</code> which 
   * have no overlap with spans from <code>exclude</code>.*/  
  public SpanNotQuery(SpanQuery include, SpanQuery exclude) {  
     this(include, exclude, 0, 0);  
  }  

显然这里的第一个参数include应该是SpanNearQuery，第二个参数就是用来做排除的。

SpanNotQuery另一个重载构造函数如下：

   Java代码  
    
  
/** Construct a SpanNotQuery matching spans from <code>include</code> which 
   * have no overlap with spans from <code>exclude</code> within  
   * <code>dist</code> tokens of <code>include</code>. */  
  public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {  
     this(include, exclude, dist, dist);  
  }  
    

它多加了一个dist参数，官方的解释是：Construct a SpanNotQuery matching spans from include which have no overlap with spans from exclude within dist tokens of include. 说白了就是，使用exclude限制以后匹配到以后，TermA和TermB之间间隔的字符长度做个限制，这就是dist的作用。

SpanNotQuery还有一个更复杂的构造函数重载：

   Java代码  
    
  
/** Construct a SpanNotQuery matching spans from <code>include</code> which 
   * have no overlap with spans from <code>exclude</code> within  
   * <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */  
  public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {  
    this.include = include;  
    this.exclude = exclude;  
    this.pre = (pre >=0) ? pre : 0;  
    this.post = (post >= 0) ? post : 0;  
  
    if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField()))  
      throw new IllegalArgumentException("Clauses must have same field.");  
  }  

最后一个post参数其实就是dist，pre参数就是限制exclude Term前面有几个字符。这样解释太抽象，用示例代码来说明吧：

   Java代码  
    
  
package com.yida.framework.lucene5.query;  
  
import java.io.IOException;  
  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.TextField;  
import org.apache.lucene.index.DirectoryReader;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.index.IndexWriterConfig.OpenMode;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.spans.SpanNearQuery;  
import org.apache.lucene.search.spans.SpanNotQuery;  
import org.apache.lucene.search.spans.SpanQuery;  
import org.apache.lucene.search.spans.SpanTermQuery;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.RAMDirectory;  
  
/** 
 * SpanNotQuery测试 
 * @author Lanxiaowei 
 * 
 */  
public class SpanNotQueryTest {  
    public static void main(String[] args) throws IOException {  
        Directory dir = new RAMDirectory();  
        Analyzer analyzer = new StandardAnalyzer();  
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);  
        iwc.setOpenMode(OpenMode.CREATE);  
        IndexWriter writer = new IndexWriter(dir, iwc);  
  
        Document doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick red fox jumps over the sleepy cat", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox quick gox jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown adult slave nice fox winde felt testcase gox quick jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox quick jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
        writer.close();  
  
        IndexReader reader = DirectoryReader.open(dir);  
        IndexSearcher searcher = new IndexSearcher(reader);  
          
        String queryStringStart = "dog";  
        String queryStringEnd = "quick";  
        String excludeString = "fox";  
        SpanQuery queryStart = new SpanTermQuery(new Term("text",queryStringStart));  
        SpanQuery queryEnd = new SpanTermQuery(new Term("text",queryStringEnd));  
        SpanQuery excludeQuery = new SpanTermQuery(new Term("text",excludeString));  
        SpanQuery spanNearQuery = new SpanNearQuery(  
            new SpanQuery[] {queryStart,queryEnd}, 12, false, false);  
          
        SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, excludeQuery, 4,3);  
        TopDocs results = searcher.search(spanNotQuery, null, 100);  
        ScoreDoc[] scoreDocs = results.scoreDocs;  
          
        for (int i = 0; i < scoreDocs.length; ++i) {  
            //System.out.println(searcher.explain(query, scoreDocs[i].doc));  
            int docID = scoreDocs[i].doc;  
            Document document = searcher.doc(docID);  
            String path = document.get("text");  
            System.out.println("text:" + path);  
        }  
    }  
}  

示例代码意思就是查询dog和quick之间没有fox的索引文档，自己运行示例代码参悟吧。

SpanOrQuery顾名思义就是把多个Span'Query用or连接起来，其实你也可以用BooleanQuery来代替SpanOrQuery,但SpanOrQuery会返回额外的Span跨度信息，它的构造函数如下：

   Java代码  
    
SpanOrQuery(SpanQuery... clauses)

接收多个SpanQuery对象并用or连接起来，下面是SpanOrQuery示例代码：

   Java代码  
    
  
package com.yida.framework.lucene5.query;  
  
import java.io.IOException;  
  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.TextField;  
import org.apache.lucene.index.DirectoryReader;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.index.IndexWriterConfig.OpenMode;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.spans.SpanNearQuery;  
import org.apache.lucene.search.spans.SpanNotQuery;  
import org.apache.lucene.search.spans.SpanOrQuery;  
import org.apache.lucene.search.spans.SpanQuery;  
import org.apache.lucene.search.spans.SpanTermQuery;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.RAMDirectory;  
  
/** 
 * SpanOrQuery测试 
 * @author Lanxiaowei 
 * 
 */  
public class SpanOrQueryTest {  
    public static void main(String[] args) throws IOException {  
        Directory dir = new RAMDirectory();  
        Analyzer analyzer = new StandardAnalyzer();  
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);  
        iwc.setOpenMode(OpenMode.CREATE);  
        IndexWriter writer = new IndexWriter(dir, iwc);  
  
        Document doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick red fox jumps over the sleepy cat", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox quick gox jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown adult slave nice fox winde felt testcase gox quick jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown adult sick slave nice fox winde felt testcase fox quick jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "the quick brown fox quick jumps over the lazy dog", Field.Store.YES));  
        writer.addDocument(doc);  
        writer.close();  
  
        IndexReader reader = DirectoryReader.open(dir);  
        IndexSearcher searcher = new IndexSearcher(reader);  
          
        String queryStringStart = "dog";  
        String queryStringEnd = "quick";  
        String excludeString = "fox";  
        String termString = "sick";  
        SpanQuery queryStart = new SpanTermQuery(new Term("text",queryStringStart));  
        SpanQuery queryEnd = new SpanTermQuery(new Term("text",queryStringEnd));  
        SpanQuery excludeQuery = new SpanTermQuery(new Term("text",excludeString));  
        SpanQuery spanNearQuery = new SpanNearQuery(  
            new SpanQuery[] {queryStart,queryEnd}, 12, false, false);  
          
        SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, excludeQuery, 4,3);  
          
        SpanQuery spanTermQuery = new SpanTermQuery(new Term("text",termString));  
          
        SpanOrQuery spanOrQuery = new SpanOrQuery(spanNotQuery,spanTermQuery);  
          
        TopDocs results = searcher.search(spanOrQuery, null, 100);  
        ScoreDoc[] scoreDocs = results.scoreDocs;  
          
        for (int i = 0; i < scoreDocs.length; ++i) {  
            //System.out.println(searcher.explain(query, scoreDocs[i].doc));  
            int docID = scoreDocs[i].doc;  
            Document document = searcher.doc(docID);  
            String path = document.get("text");  
            System.out.println("text:" + path);  
        }  
    }  
}  

SpanMultiTermQueryWrapper:就是一个Query转换器，用于把MultiTermQuery包装转换成SpanQuery的，具体使用示例，我贴下官方API里提供的示例代码吧：

   Java代码  
    
  
WildcardQuery wildcard = new WildcardQuery(new Term("field", "bro?n"));  
 SpanQuery spanWildcard = new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard);  

SpanPositionRangeQuery:这个query是用来限制匹配的情况是否分布在(start,end)这个区间内，区间索引从零开始计算，拿示例代码说话，

   Java代码  
    
  
package com.yida.framework.lucene5.query;  
  
import java.io.IOException;  
  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.TextField;  
import org.apache.lucene.index.DirectoryReader;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.index.IndexWriterConfig.OpenMode;  
import org.apache.lucene.search.FuzzyQuery;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;  
import org.apache.lucene.search.spans.SpanNearQuery;  
import org.apache.lucene.search.spans.SpanNotQuery;  
import org.apache.lucene.search.spans.SpanPositionRangeQuery;  
import org.apache.lucene.search.spans.SpanQuery;  
import org.apache.lucene.search.spans.SpanTermQuery;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.RAMDirectory;  
  
/** 
 * SpanPositionRangeQuery测试 
 * @author Lanxiaowei 
 * 
 */  
public class SpanPositionRangeQueryTest {  
    public static void main(String[] args) throws IOException {  
        Directory dir = new RAMDirectory();  
        Analyzer analyzer = new StandardAnalyzer();  
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);  
        iwc.setOpenMode(OpenMode.CREATE);  
        IndexWriter writer = new IndexWriter(dir, iwc);  
  
        Document doc = new Document();  
        doc.add(new TextField("text", "quick brown fox", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "jumps over lazy broun dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
        doc = new Document();  
        doc.add(new TextField("text", "jumps over extremely very lazy broxn dog", Field.Store.YES));  
        writer.addDocument(doc);  
          
          
        writer.close();  
  
        IndexReader reader = DirectoryReader.open(dir);  
        IndexSearcher searcher = new IndexSearcher(reader);  
          
        FuzzyQuery fq = new FuzzyQuery(new Term("text", "broan"));  
        SpanQuery sfq = new SpanMultiTermQueryWrapper<FuzzyQuery>(fq);  
          
        SpanPositionRangeQuery spanPositionRangeQuery = new SpanPositionRangeQuery(sfq, 3, 5);  
          
        TopDocs results = searcher.search(spanPositionRangeQuery, null, 100);  
        ScoreDoc[] scoreDocs = results.scoreDocs;  
          
        for (int i = 0; i < scoreDocs.length; ++i) {  
            //System.out.println(searcher.explain(query, scoreDocs[i].doc));  
            int docID = scoreDocs[i].doc;  
            Document document = searcher.doc(docID);  
            String path = document.get("text");  
            System.out.println("text:" + path);  
        }  
    }  
}  

稍微解释下上面的代码，首先呢，FuzzyQuery fq = new FuzzyQuery(new Term("text", "broan"));用来查询包含跟单词broan相似字符的索引文档，显然第一个索引文档不符合排除了一个，然后呢，我们new了一个SpanQuery包装器Wrapper，把FuzzyQuery转换成了SpanQuery,然后使用SpanPositionRangeQuery对匹配到的2种情况的落放的位置进行限制即跟broan相似的单词必须分布在(3,5)这个区间内，显然第3个索引文档是分布在(3,6)这个区间内，所以第3个索引文档被排除了，最后只返回第2个索引文档。

SpanPositionRangeQuery还有个子类SpanFirstQuery,其实SpanFirstQuery只不过是把SpanPositionRangeQuery构造函数里的start参数值设置为0，仅此而已，所以不用多说，你也懂的，它的构造函数如下：

   Java代码  
    
  
SpanFirstQuery(SpanQuery match, int end)   
Construct a SpanFirstQuery matching spans in match whose end position is less than or equal to end.  

这也就是为什么只有一个end,没有start,因为start默认为零，看源码：

SpanFirstQuery示例我就不提供了，略过。

最后一个要说的就是FieldMaskingSpanQuery，它用于在多个域之间查询，即把另一个域看作某个域，从而看起来就像在同一个域里查询，因为Lucene默认某个条件只能作用在单个域上，不支持跨域查询只能在同一个域里查询，所以有了FieldMaskingSpanQuery，，下面是示例代码：

   Java代码  
    
  
package com.yida.framework.lucene5.query;  
  
import java.io.IOException;  
  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.index.DirectoryReader;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.index.IndexWriterConfig.OpenMode;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;  
import org.apache.lucene.search.spans.SpanNearQuery;  
import org.apache.lucene.search.spans.SpanQuery;  
import org.apache.lucene.search.spans.SpanTermQuery;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.RAMDirectory;  
  
/** 
 * FieldMaskingSpanQuery测试 
 * @author Lanxiaowei 
 * 
 */  
public class FieldMaskingSpanQueryTest {  
    public static void main(String[] args) throws IOException {  
        Directory dir = new RAMDirectory();  
        Analyzer analyzer = new StandardAnalyzer();  
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);  
        iwc.setOpenMode(OpenMode.CREATE);  
        IndexWriter writer = new IndexWriter(dir, iwc);  
  
        Document doc = new Document();  
  
        doc.add(new Field("teacherid", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));  
  
        doc.add(new Field("studentfirstname", "james", Field.Store.YES, Field.Index.NOT_ANALYZED));  
          
        doc.add(new Field("studentsurname", "jones", Field.Store.YES, Field.Index.NOT_ANALYZED));  
  
        writer.addDocument(doc);  
          
          
        //teacher2  
        doc = new Document();  
  
        doc.add(new Field("teacherid", "2", Field.Store.YES, Field.Index.NOT_ANALYZED));  
  
        doc.add(new Field("studentfirstname", "james", Field.Store.YES, Field.Index.NOT_ANALYZED));  
  
        doc.add(new Field("studentsurname", "smith", Field.Store.YES, Field.Index.NOT_ANALYZED));  
  
        doc.add(new Field("studentfirstname", "sally", Field.Store.YES, Field.Index.NOT_ANALYZED));  
  
        doc.add(new Field("studentsurname", "jones", Field.Store.YES, Field.Index.NOT_ANALYZED));  
  
        writer.addDocument(doc);  
          
        writer.close();  
  
        IndexReader reader = DirectoryReader.open(dir);  
        IndexSearcher searcher = new IndexSearcher(reader);  
          
        SpanQuery q1  = new SpanTermQuery(new Term("studentfirstname", "james"));  
        SpanQuery q2  = new SpanTermQuery(new Term("studentsurname", "jones"));  
          
        SpanQuery q2m = new FieldMaskingSpanQuery(q2, "studentfirstname");  
  
        Query query = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false);  
        TopDocs results = searcher.search(query, null, 100);  
        ScoreDoc[] scoreDocs = results.scoreDocs;  
          
        for (int i = 0; i < scoreDocs.length; ++i) {  
            //System.out.println(searcher.explain(query, scoreDocs[i].doc));  
            int docID = scoreDocs[i].doc;  
            Document document = searcher.doc(docID);  
            String teacherid = document.get("teacherid");  
            System.out.println("teacherid:" + teacherid);  
        }  
    }  
}  

OK，SpanQuery就说这么多，接下来要说的就是PhraseQuery。

如果你还有什么问题请加我Ｑ-Q：7-3-6-0-3-1-3-0-5，

或者加裙
一起交流学习！

转载：http://iamyida.iteye.com/blog/2195761

Lucene5学习之SpanQuery跨度查询

热门文章

最新文章

相关课程

相关电子书

相关实验场景