zoukankan      html  css  js  c++  java
  • lucene 3.0.2 search 各种各样的Query类型

    http://blog.sina.com.cn/s/blog_61d2047c010195mo.html
     
     
    lucene的这种各种各样的查询类型
    1、TermQuery 
         最简单的Query类型,某一个field是否含有一个term的value
     
    2、TermRangeQuery 
         由于term在index中是按照字典顺序排列的,可以使用TermRangeQuery查询一个范围内的Term
    例如
    Query query = new TermRangeQuery("city", "aa", "am", true, true);
    TopDocs hits = searcher.search(query, 20);
     
    可以查血从aa* ab* ..... am*的term。 后面的true和false代表是否包括aa和am
     
    3、NumericRangeQuery 
    查询一个数值的范围。 这个必须查血NumericFiled
    Query query = NumericRangeQuery.newIntRange("intID", from, to, true,true);
    TopDocs hits = searcher.search(query, 20);
     
    4、PrefixQuery 前缀查询
          查询一个term是否满足一个前缀。
         比如 prefix =“bri” bridge和“bright”都可以满足
    Term t = new Term(field, prefix);
    Query query = new PrefixQuery(t);
    TopDocs hits = searcher.search(query, 20);
     
    5、BooleanQuery 联合多个查找
    Term t = new Term("contents", "bri");
    Query query1 = new PrefixQuery(t);
    Query query2 = NumericRangeQuery.newIntRange("intID", 1, 3, true, true);
     
    // create a boolean query
    BooleanQuery query = new BooleanQuery();
    query.add(query1, BooleanClause.Occur.SHOULD);
    query.add(query2, BooleanClause.Occur.MUST);
     
    TopDocs hits = searcher.search(query, 20);
    注意BooleanClause.Occur.MUST是and的意思,BooleanClause.Occur.SHOULD是or的意思,BooleanClause.Occur.MUST_NOT是not的意思
     
    6、PhraseQuery 短语查询
         我们想查询一个短语  fox quick 或者 quick fox 或者quick brown fox,或者quick red fox。
         可以使用phraseQuery,  PhraseQuery使用Edit distance(编辑距离) 来量度,编辑距离是一个字符串变化到另一个字符串需要的替换,删除,插入的次数总和。每一次这种操作叫做一次slop。可以使用setSlop来限制短语slop的最大值。
    edit distance如下图
     
        lucene <wbr>in <wbr>action <wbr>第三章(2) <wbr>search <wbr>各种各样的Query类型
     
    比如: quick fox 到quick [xxx] fox 需要 1 slop
    fox quick 到 quick [xxx] fox 需要 3 slop 先用quick替换 fox,再用fox替换quick,再插入一个xxx 总共3次。
         PhraseQuery query = new PhraseQuery();
     
    // set max slop to 10
    query.setSlop(10);
    query.add(new Term("contents", " quick " ));
    query.add(new Term("contents", " fox"));
    TopDocs hits = searcher.search(query, 20);
     
    7、WildcardQuery通配符查询
         PrefixQuery是WildcardQuery 的特殊形式
         *代表一个或者多个,?代表0个或者一个
                    // use wildchard "?ridg*"
    WildcardQuery query = new WildcardQuery(new Term("contents", "?ridg*"));
    TopDocs hits = searcher.search(query, 20);
     
    8、FuzzyQuery  模糊查询
        FuzzyQuery与PhraseQury 一类似都是以Edit distance 来做的,只不过 FuzzyQuery是在term内部,而PhraseQuery是在term之间。   
     例如    FuzzyQuery query = new FuzzyQuery(new Term("contents", "Amsteedam")); 可以查出 Amsterdam,他们之间的编辑距离是1。
    如下
     IndexSearcher searcher = new IndexSearcher(dir);
    // "Amsterdam" is similar to "Amsteedam"
    FuzzyQuery query = new FuzzyQuery(new Term("contents", "Amsteedam"));
    TopDocs hits = searcher.search(query, 20);
    showResult(hits, searcher);
     
    package charpter3;
     
    import java.io.File;
    import java.io.IOException;
     
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.Field.TermVector;
    import org.apache.lucene.document.NumericField;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryParser.ParseException;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.BooleanClause;
    import org.apache.lucene.search.BooleanQuery;
    import org.apache.lucene.search.FuzzyQuery;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.NumericRangeQuery;
    import org.apache.lucene.search.PhraseQuery;
    import org.apache.lucene.search.PrefixQuery;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TermQuery;
    import org.apache.lucene.search.TermRangeQuery;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.WildcardQuery;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
     
    public class Querys {
    private IndexWriter writer;
    protected String[] ids = { "1", "2", "3" };
    protected String[] unindexed = { "Netherlands", "Italy", "China" };
    protected String[] unstored = { "Amsterdam has a lot of bridge",
    "Venice has lots of canals", "Amsterddam bridges are a lot" };
    protected String[] text = { "Amsterdam", "Venice", "Aeijing" };
     
    private Directory dir = null;
    private IndexReader indexReader = null;
     
    public Querys(String indexDir) throws IOException {
    dir = FSDirectory.open(new File(indexDir));
    this.writer = new IndexWriter(dir, new StandardAnalyzer(
    Version.LUCENE_36), true, IndexWriter.MaxFieldLength.UNLIMITED);
    this.writer.setInfoStream(System.out);
     
    // create a index reader instance
    indexReader = IndexReader.open(dir);
    }
     
     
    public void addDocuments() throws CorruptIndexException, IOException {
    for (int i = 0; i < ids.length; i++) {
    Document doc = new Document();
     
    NumericField nfield = new NumericField("intID", 10);
    nfield.setIntValue(i);
    doc.add(nfield);
     
    doc.add(new Field("id", ids[i], Field.Store.YES,
    Field.Index.NOT_ANALYZED));
    doc.add(new Field("country", unindexed[i], Field.Store.YES,
    Field.Index.NO));
    doc.add(new Field("contents", unstored[i], Field.Store.YES,
    Field.Index.ANALYZED));
    doc.add(new Field("city", text[i], Field.Store.YES,
    Field.Index.ANALYZED));
    writer.addDocument(doc);
     
    }
     
    System.out.println("docs = " + writer.numDocs());
     
    }
     
    public void index() throws CorruptIndexException, IOException {
    this.addDocuments();
    this.commit();
    }
     
     
    public void expressionQuery() throws CorruptIndexException, IOException,
    ParseException {
     
    IndexSearcher searcher = new IndexSearcher(this.indexReader);
     
    QueryParser praser = new QueryParser(Version.LUCENE_CURRENT,
    "contents", new StandardAnalyzer(Version.LUCENE_CURRENT));
     
    // note
    Query query = praser.parse("+bridge -Amsterdam");
    System.out.println("query = " + query.toString());
    TopDocs hits = searcher.search(query, 20);
    showResult(hits, searcher);
     
    }
     
     
    public void termQuery(String fieldName, String q)
    throws CorruptIndexException, IOException, ParseException {
    // IndexSearcher searcher = new IndexSearcher(dir);
     
    // build a indexSearch on a indexReader
    IndexSearcher searcher = new IndexSearcher(this.indexReader);
     
    Term t = new Term(fieldName, q.toLowerCase());
    Query query = new TermQuery(t);
    TopDocs hits = searcher.search(query, 20);
    showResult(hits, searcher);
    }
     
     
    public void termRangeQuery(String fieldName, String q)
    throws CorruptIndexException, IOException, ParseException {
    IndexSearcher searcher = new IndexSearcher(dir);
     
    Query query = new TermRangeQuery("city", "aa", "am", true, true);
    TopDocs hits = searcher.search(query, 20);
    showResult(hits, searcher);
    }
     
     
    public void numericRangeQuery(int from, int to)
    throws CorruptIndexException, IOException, ParseException {
    IndexSearcher searcher = new IndexSearcher(dir);
     
    Query query = NumericRangeQuery.newIntRange("intID", from, to, true,true);
    TopDocs hits = searcher.search(query, 20);
    showResult(hits, searcher);
    }
     
     
    public void prefixQuery(String field, String prefix)
    throws CorruptIndexException, IOException, ParseException {
    IndexSearcher searcher = new IndexSearcher(dir);
     
    Term t = new Term(field, prefix);
    Query query = new PrefixQuery(t);
    TopDocs hits = searcher.search(query, 20);
    showResult(hits, searcher);
    }
     
     
    public void booleanQuery() throws CorruptIndexException, IOException,
    ParseException {
    IndexSearcher searcher = new IndexSearcher(dir);
     
    Term t = new Term("contents", "bri");
    Query query1 = new PrefixQuery(t);
     
    Query query2 = NumericRangeQuery.newIntRange("intID", 1, 3, true, true);
     
    // create a boolean query
    BooleanQuery query = new BooleanQuery();
    query.add(query1, BooleanClause.Occur.SHOULD);
    query.add(query2, BooleanClause.Occur.MUST);
     
    TopDocs hits = searcher.search(query, 20);
     
    showResult(hits, searcher);
     
    }
     
     
    public void phraseQuery() throws CorruptIndexException, IOException,
    ParseException {
    IndexSearcher searcher = new IndexSearcher(dir);
    PhraseQuery query = new PhraseQuery();
     
    // set max slop to 10
    query.setSlop(10);
    query.add(new Term("contents", "lot"));
    query.add(new Term("contents", "bridges"));
    TopDocs hits = searcher.search(query, 20);
     
    showResult(hits, searcher);
     
    }
     
     
    public void wildCardQuery() throws CorruptIndexException, IOException,
    ParseException {
    IndexSearcher searcher = new IndexSearcher(dir);
     
    // use wildchard "?ridg*"
    WildcardQuery query = new WildcardQuery(new Term("contents", "?ridg*"));
    TopDocs hits = searcher.search(query, 20);
     
    showResult(hits, searcher);
    }
     
     
    public void fuzzyQuery() throws CorruptIndexException, IOException,
    ParseException {
    IndexSearcher searcher = new IndexSearcher(dir);
     
    // "Amsterdam" is similar to "Amsteedam"
    FuzzyQuery query = new FuzzyQuery(new Term("contents", "Amsteedam"));
    TopDocs hits = searcher.search(query, 20);
    showResult(hits, searcher);
     
    }
     
     
    public void testReopen() throws ParseException, IOException {
     
    IndexSearcher searcher = new IndexSearcher(this.indexReader);
     
    QueryParser praser = new QueryParser(Version.LUCENE_CURRENT,
    "contents", new StandardAnalyzer(Version.LUCENE_CURRENT));
     
    // note
    Query query = praser.parse("+bridge -Amsterdam");
    System.out.println("query = " + query.toString());
     
    TopDocs hits = searcher.search(query, 20);
     
    // reopen a index and will cover current modification of index.
    IndexReader newReader = indexReader.reopen();
    if (indexReader != newReader) {
    indexReader = newReader;
     
    // if indexReader is changed , searcher must be constructed.
    searcher.close();
    searcher = null;
    searcher = new IndexSearcher(this.indexReader);
    }
     
    hits = searcher.search(query, 20);
     
    showResult(hits, searcher);
     
    }
     
     
    public void testTopDocs() throws CorruptIndexException, IOException {
    IndexSearcher searcher = new IndexSearcher(dir);
     
    // "Amsterdam" is similar to "Amsteedam"
    FuzzyQuery query = new FuzzyQuery(new Term("contents", "Amsteedam"));
    TopDocs hits = searcher.search(query, 20);
     
    System.out.println("search result:");
     
    for (ScoreDoc doc : hits.scoreDocs) {
    // 閸欐牕绶遍崨鎴掕厬閻ㄥ嫭鏋冨�锟�
    Document d = searcher.doc(doc.doc);
    System.out.println(d.get("contents"));
    }
    }
     
    public void commit() throws CorruptIndexException, IOException {
    this.writer.commit();
    }
     
     
    public void showResult(TopDocs hits, IndexSearcher searcher) {
     
    try {
    System.out.println("search result:");
     
    for (ScoreDoc doc : hits.scoreDocs) {
    // 閸欐牕绶遍崨鎴掕厬閻ㄥ嫭鏋冨�锟�
    Document d = searcher.doc(doc.doc);
    System.out.println(d.get("contents"));
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    }
     
     
    public static void main(String[] args) throws IOException, ParseException {
    // TODO Auto-generated method stub
    Querys ci = new Querys("charpter2-1");
    ci.index();
    System.out.println("----------termQuery--------------");
    ci.termQuery("city", "Venice");
     
    System.out.println("----------termRangeQuery--------------");
    ci.termRangeQuery(null, null);
     
    System.out.println("----------numericRangeQuery--------------");
    ci.numericRangeQuery(1, 5);
     
    System.out.println("----------prefixQuery--------------");
    ci.prefixQuery("contents", "bri");
     
    System.out.println("----------booleanQuery--------------");
    ci.booleanQuery();
     
    System.out.println("----------phraseQuery--------------");
    ci.phraseQuery();
     
    System.out.println("----------wildCardQuery--------------");
    ci.wildCardQuery();
     
    System.out.println("----------fuzzyQuery--------------");
    ci.fuzzyQuery();
     
    System.out.println("----------expressionQuery--------------");
    ci.expressionQuery();
     
    System.out.println("----------test reopen--------------");
    ci.testReopen();
     
    }
     
    }
  • 相关阅读:
    [OS] 信号量(Semaphore)
    [OS] 进程互斥
    [剑指Offer] 52.正则表达式匹配
    [剑指Offer] 51.构建乘积数组
    [剑指Offer] 50.数组中重复的数字
    [剑指Offer] 49.把字符串转换成整数
    [剑指Offer] 48.不用加减乘除做加法
    [剑指Offer] 47.求1+2+3+...+n
    PHP知识库图谱汇总(完善中)
    修改thinkphp路由模式,去掉Home
  • 原文地址:https://www.cnblogs.com/XDJjy/p/4363571.html
Copyright © 2011-2022 走看看