zoukankan      html  css  js  c++  java
  • 对电影导演、演员、名称几个重要字段建立lucene(线程)及使用

    package cn.com.douban.movie.lucene;
    
    import java.io.IOException;
    import java.sql.SQLException;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.queryParser.ParseException;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocCollector;
    import org.apache.lucene.search.TopDocs;
    
    import com.sun.org.apache.bcel.internal.generic.ARRAYLENGTH;
    
    import cn.com.douban.movie.dao.MovieDao;
    import cn.com.douban.movie.daoimp.MovieDaoImp;
    import cn.com.douban.movie.entity.Movie;
    
    public class TestLucene {
     
    public void createLucene(String url) throws CorruptIndexException, IOException{
    Analyzer analyzer=new StandardAnalyzer(); //创建索引分析器
    
    //索引输出流 参数一:索引文件存放在位置 2 分析 3如果有就覆盖,没有就创建
    IndexWriter indexwriter=new IndexWriter(url, analyzer,true,IndexWriter.MaxFieldLength.UNLIMITED);//域无限
    MovieDao md=new MovieDaoImp();
    List<Movie> list=new ArrayList<Movie>();
    try {
    list=md.SelectAllMovie2();
    } catch (ClassNotFoundException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    } catch (SQLException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    
    
    int i=0;
    for(Movie m:list){
    i++;
    
    Document document=new Document();//创建一条记录,文档信息
    Field fid=new Field("id",i+"",Field.Store.YES,Field.Index.NO);//创建一个字段:字段名,是否存储,是否作为索引字段
    Field fmid=new Field("mid",m.getMovie_id()+"",Field.Store.YES,Field.Index.NO);
    Field fmname=new Field("mname",m.getMname(),Field.Store.YES,Field.Index.ANALYZED);
    Field factor=new Field("actor",m.getActor(),Field.Store.YES,Field.Index.ANALYZED);
    Field farea=new Field("area",m.getArea(),Field.Store.YES,Field.Index.NO);
    Field fdes=new Field("des",m.getDescription(),Field.Store.YES,Field.Index.NO);
    Field fdir=new Field("dir",m.getDirector(),Field.Store.YES,Field.Index.ANALYZED);
    Field fimg=new Field("img",m.getImg(),Field.Store.YES,Field.Index.NO);
    Field fpud=new Field("pud",m.getPubdate(),Field.Store.YES,Field.Index.NO);
    Field ftype=new Field("type",m.getType(),Field.Store.YES,Field.Index.NO);
    Field fcount=new Field("count",m.getComm_count()+"",Field.Store.YES,Field.Index.NO);
    Field fscore=new Field("score",m.getCommscore(),Field.Store.YES,Field.Index.NO);
    document.add(fid);//组成记录
    document.add(fmid);
    document.add(fmname);
    document.add(fdir);
    document.add(factor);
    document.add(farea);
    document.add(ftype);
    document.add(fpud);
    document.add(fcount);
    document.add(fimg);
    document.add(fdes);
    document.add(fscore);
    indexwriter.addDocument(document);//添加到索引
    }
    indexwriter.optimize();//优化内存
    indexwriter.close();//关闭
    
    }
    public List<Movie> searcherLucene(String url,String str) throws CorruptIndexException, IOException, ParseException{
    List<Movie> list=new ArrayList<Movie>();
    Analyzer analyzer=new StandardAnalyzer();
    IndexSearcher searcher=new IndexSearcher(url);//查询分析器
    
    QueryParser qp1=new QueryParser("mname",analyzer);//要解析的字段,索引分析器
    QueryParser qp2=new QueryParser("actor",analyzer);
    QueryParser qp3=new QueryParser("dir",analyzer);
    
    
    
    Query query1=qp1.parse(str);//通过Queryparser对要查找的字符串进行解析
    Query query2=qp2.parse(str);
    Query query3=qp3.parse(str);
    if(searcher!=null){//判断查询分析器是否为空
    TopDocCollector collector=new TopDocCollector(8);//创建lucene容器对象
    searcher.search(query1,collector);//把在query中查到的数据,匹配的数据,放到容器中
    searcher.search(query2,collector);
    searcher.search(query3,collector);
    TopDocs topdocs=collector.topDocs();//返回所有匹配对象
    ScoreDoc[] docs=topdocs.scoreDocs;//把这些匹配的对象转换成一个数组
    for(int i=0;i<docs.length;i++){
    int docId=docs[i].doc;
    Document document=searcher.doc(docId);//通过scoredoc的id查询出对应的记录
    Movie m=new Movie();
    m.setMovie_id(Integer.parseInt(document.get("mid")));
    m.setMname(document.get("mname"));
    m.setActor(document.get("actor"));
    m.setArea(document.get("area"));
    m.setDescription(document.get("des"));
    m.setDirector(document.get("dir"));
    m.setImg(document.get("img"));
    m.setPubdate(document.get("pud"));
    m.setType(document.get("type"));
    m.setComm_count(Integer.parseInt(document.get("count")));
    m.setCommscore(document.get("score"));
    list.add(m);
    }
    }
    searcher.close();
    return list;
    }
    /**
     * @param args
     */
    测试
    
     
    
    public static void main(String[] args){
    TestLucene tt=new TestLucene();
    try {
    tt.createLucene("DouBanPrj\\src\\cn\\com\\douban\\movie\\entity\\doc\\lucene");
    } catch (CorruptIndexException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    //List<Movie> list=new ArrayList<Movie>();
    //try {
    //
    //list=tt.searcherLucene("C:\\Documents and Settings\\Administrator\\Workspaces\\MyEclipse 8.x\\DouBanPrj\\src\\cn\\com\\douban\\movie\\entity\\doc\\lucene","张艺谋");
    //for (Movie m : list) {
    //System.out.println(m.getActor()+"@@@"+m.getMname()+"@@@@"+m.getImg());
    //}
    //} catch (CorruptIndexException e) {
    //// TODO Auto-generated catch block
    //e.printStackTrace();
    //} catch (IOException e) {
    //// TODO Auto-generated catch block
    //e.printStackTrace();
    //} catch (ParseException e) {
    //// TODO Auto-generated catch block
    //e.printStackTrace();
    //}
    }
    }
    create lucene thread
    
    package cn.com.douban.movie.lucene;
    
    import java.io.IOException;
    import java.util.Date;
    import java.util.Timer;
    import java.util.TimerTask;
    
    import org.apache.lucene.index.CorruptIndexException;
    
    public class CreateLuceneThread extends TimerTask {
    
    @Override
    public void run() {
    // TODO Auto-generated method stub
    TestLucene tl=new TestLucene();
    try {
    tl.createLucene("E:\\MyEclipse 8.5\\jspworkspace\\DouBanPrj\\src\\cn\\com\\douban\\movie\\entity\\doc\\lucene");
    System.out.println("create again!");
    } catch (CorruptIndexException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    }
    public static void main(String[] args){
    new Timer().scheduleAtFixedRate(new CreateLuceneThread(), new Date(), 5000);
    
    
    }
     
    
    
    
    
    }
  • 相关阅读:
    6 完全平方数相关
    5 三位数,每个位置不同
    Neo4j Admin Import 导入多个node和relationship
    Rust所有权
    Rust 多态
    Rust 泛型
    Rust trait
    Rust模块化
    Spring Cloud(Dalston.SR1)
    git 速度慢问题解决
  • 原文地址:https://www.cnblogs.com/lcuzhanglei/p/2616338.html
Copyright © 2011-2022 走看看