zoukankan      html  css  js  c++  java
  • IKAnalyzer兼容Lucene 5.4.0版本抛出异常?

    ava.lang.AbstractMethodError: org.apache.lucene.analysis.Analyzer.createComponents(Ljava/lang/String;)Lorg/apache/lucene/analysis/Analyzer$TokenStreamComponents;
        at org.apache.lucene.analysis.Analyzer.tokenStream(Analyzer.java:176)
        at org.apache.lucene.document.Field.tokenStream(Field.java:562)
        at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:607)
        at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:344)
        at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:300)
        at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:234)
        at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:450)
        at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1477)
        at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1256)
        at com.study.lucene.demo.IndexFileTester.addDocument4(IndexFileTester.java:120)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
        at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
        at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
        at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
        at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271)
        at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70)
        at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
        at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
        at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63)
        at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236)
        at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53)

    根据报错信息,原来是下载的IK Analyzer 2012FF_hf1.zip不兼容最新的Lucene 5.4.0版本(我下载的Lucene版本是5.4.0),看了下IK Analyzer 2012FF_hf1.zip的源码,原来是其中的IKAnalyzer.java和IKTokenizer.java两个类不兼容Lucene 5.4.0版本的API(这两个文件是IKAnalyzer接入Lucene 的核心类),对其进行相应的改进吧,本来想通过继承方式的,但KAnalyzer.java和IKTokenizer.java两个类都是final的,没办法,只能重写了。

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.Tokenizer;
    
    public class ReIKAnalyzer extends Analyzer {
    
    	private boolean useSmart;
    
    	public boolean useSmart() {
    		return useSmart;
    	}
    
    	public void setUseSmart(boolean useSmart) {
    		this.useSmart = useSmart;
    	}
    
    	/**
    	 * IK分词器Lucene 5.4.0 Analyzer接口实现类
    	 * 
    	 * 默认细粒度切分算法
    	 */
    	public ReIKAnalyzer() {
    		this(false);
    	}
    
    	/**
    	 * IK分词器Lucene 5.4.0 Analyzer接口实现类
    	 * 
    	 * @param useSmart
    	 *            当为true时,分词器进行智能切分
    	 */
    	public ReIKAnalyzer(boolean useSmart) {
    		super();
    		this.useSmart = useSmart;
    	}
    
    	/**
    	 * 重载Analyzer接口,构造分词组件
    	 * 
    	 * @param fieldName
    	 *            the name of the fields content passed to the
    	 *            TokenStreamComponents sink as a reader
    	 */
    	@Override
    	protected TokenStreamComponents createComponents(String fieldName) {
    		Tokenizer _IKTokenizer = new ReIKTokenizer(this.useSmart());
    		return new TokenStreamComponents(_IKTokenizer);
    	}
    }
    

      

    import java.io.IOException;
    
    import org.apache.lucene.analysis.Tokenizer;
    import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
    import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
    import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
    import org.wltea.analyzer.core.IKSegmenter;
    import org.wltea.analyzer.core.Lexeme;
    
    public class ReIKTokenizer extends Tokenizer {
    
    	// IK分词器实现
    	private IKSegmenter _IKImplement;
    
    	// 词元文本属性
    	private final CharTermAttribute termAtt;
    	// 词元位移属性
    	private final OffsetAttribute offsetAtt;
    	// 词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量)
    	private final TypeAttribute typeAtt;
    	// 记录最后一个词元的结束位置
    	private int endPosition;
    
    	/**
    	 * Lucene 5.4.0 Tokenizer适配器类构造函数
    	 * 
    	 * @param in
    	 * @param useSmart
    	 */
    	public ReIKTokenizer(boolean useSmart) {
    		super();
    		offsetAtt = addAttribute(OffsetAttribute.class);
    		termAtt = addAttribute(CharTermAttribute.class);
    		typeAtt = addAttribute(TypeAttribute.class);
    		_IKImplement = new IKSegmenter(input, useSmart);
    	}
    
    	@Override
    	public boolean incrementToken() throws IOException {
    		// 清除所有的词元属性
    		clearAttributes();
    		Lexeme nextLexeme = _IKImplement.next();
    		if (nextLexeme != null) {
    			// 将Lexeme转成Attributes
    			// 设置词元文本
    			termAtt.append(nextLexeme.getLexemeText());
    			// 设置词元长度
    			termAtt.setLength(nextLexeme.getLength());
    			// 设置词元位移
    			offsetAtt.setOffset(nextLexeme.getBeginPosition(),
    					nextLexeme.getEndPosition());
    			// 记录分词的最后位置
    			endPosition = nextLexeme.getEndPosition();
    			// 记录词元分类
    			typeAtt.setType(nextLexeme.getLexemeTypeString());
    			// 返会true告知还有下个词元
    			return true;
    		}
    		// 返会false告知词元输出完毕
    		return false;
    	}
    
    	@Override
    	public void reset() throws IOException {
    		super.reset();
    		_IKImplement.reset(input);
    	}
    
    	@Override
    	public final void end() {
    		// set final offset
    		int finalOffset = correctOffset(this.endPosition);
    		offsetAtt.setOffset(finalOffset, finalOffset);
    	}
    }
    

      

  • 相关阅读:
    算法导论(1)堆排序
    Opencv--HoughCircles源码剖析
    数据结构算法应用C++语言描述——(1)C++基础知识
    Java编程的23种设计模式
    团队建设
    管理方法论和角色认知
    压力测试:怎样设计全链路压力测试平台
    09-数据库优化方案(二):写入数据量增加时,如何实现分库分表
    08-数据库优化方案(一):查询请求增加时,如何做主从分离
    07-池化技术:如何减少频繁创建数据库连接的性能损耗
  • 原文地址:https://www.cnblogs.com/fan-yuan/p/9228838.html
Copyright © 2011-2022 走看看