package com.zte.adc.search.service.impl;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.LockObtainFailedException;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;
import com.zte.adc.search.dao.SearchStateBeanDAO;
import com.zte.adc.search.dao.impl.IndexBeanDAOImpl;
import com.zte.adc.search.dao.impl.SearchStateBeanDAOImpl;
import com.zte.adc.search.entity.IndexBean;
import com.zte.adc.search.entity.SearchStateBean;
import com.zte.adc.search.entity.XmlBean;
import com.zte.adc.search.service.IndexService;
import com.zte.adc.search.service.XmlParserService;
import com.zte.adc.search.utils.PageCondition;
import com.zte.adc.search.utils.PageDataSet;
import com.zte.adc.search.utils.PropertyManager;
import com.zte.adc.search.utils.SearchStateEnum;
import com.zte.adc.search.utils.StringTools;
public class IndexServiceImpl implements IndexService {
private static final Log log = LogFactory.getLog(IndexServiceImpl.class);
// 用于匹配搜索的字段
private String[] ids = { "id", "type" };
// 匹配的时段是否是必须
private Occur[] occur = { Occur.MUST, Occur.MUST };
// 获得索引目录
private String indexDir = PropertyManager
.getPropertyByName("index.file.path");
// 读取xml接口
private XmlParserService xmlService = null;
// xml取得的结果
private List<XmlBean> beans = new ArrayList<XmlBean>();
// 索引数据操作类
private IndexBeanDAOImpl dao = new IndexBeanDAOImpl();
// 数据库行状态变化类
private SearchStateBeanDAO sdao = new SearchStateBeanDAOImpl();
// 创建索引
public void createIndex() {
log.warn("开始创建索引");
if (ifIndexExist()) {
return;
}
File file = new File(indexDir);
if (!file.exists()) {
log.warn("索引目录不存,创建索引目录");
file.mkdir();
}
xmlService = new SAXXmlParserServiceImpl();
beans = xmlService.getXmlBean();
IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
IndexWriter writer = null;
List<IndexBean> list;
try {
writer = new IndexWriter(file, new IKAnalyzer());
writeIndexBean(beans, writer);
log.warn("创建索引成功");
} catch (CorruptIndexException e) {
log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
} catch (LockObtainFailedException e) {
log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
} catch (IOException e) {
log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
} finally {
try {
if (writer != null)
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
log.error(e.toString());
} catch (IOException e) {
// TODO Auto-generated catch block
log.error(e.toString());
}
}
}
// 删除所有索引
public void deleteAllIndex() {
log.warn("开始删除所有索引");
IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
IndexWriter writer = null;
try {
writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
writer.deleteAll();
writer.optimize();
log.warn("删除索引成功");
} catch (CorruptIndexException e) {
log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
} catch (LockObtainFailedException e) {
log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
} catch (IOException e) {
log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
} finally {
try {
if (writer != null)
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
log.error(e.toString());
} catch (IOException e) {
// TODO Auto-generated catch block
log.error(e.toString());
}
}
}
// 根据xmlBean查询数据库得到indexBean
public List<IndexBean> getIndexBean(List<XmlBean> xmlBean) {
List<IndexBean> list = new ArrayList<IndexBean>();
XmlBean bean = null;
String sql = null;
List<IndexBean> listBean = null;
for (int i = 0; i < xmlBean.size(); i++) {
bean = xmlBean.get(i);
sql = this.getSqlByXmlBean(bean);
listBean = dao.getIndexBean(sql, null, bean);
list.addAll(listBean);
}
return list;
}
// 根据xmlBean查询数据库得到indexBean
public void writeIndexBean(List<XmlBean> xmlBean, IndexWriter writer) {
List<IndexBean> list = new ArrayList<IndexBean>();
XmlBean bean;
String sql;
for (int i = 0; i < xmlBean.size(); i++) {
bean = xmlBean.get(i);
sql = this.getSqlByXmlBean(bean);
// if (bean.getTable().equalsIgnoreCase("nn_faq")) {
// sql = sql + " where INTSTATUS=1";
// }
// if (bean.getTable().equalsIgnoreCase("nn_news")
// || bean.getTable().equalsIgnoreCase("nn_affichedetail")) {
// sql = sql + " where strsendto = 'OP'";
// }
log.warn("根据xmlBean得到sql语句: " + sql);
dao.writeIndexBean(sql, null, bean, writer);
}
}
// 取配置文件中的设置的每页行数
public int getPageSize() {
return Integer.parseInt(PropertyManager
.getPropertyByName("index.pageSize"));
}
@Override
// 判断索引是否存在
public boolean ifIndexExist() {
File directory = new File(indexDir);
if (!directory.exists())
directory.mkdir();
if (directory.listFiles() != null) {
if (directory.listFiles().length >= 3) {
return true;
} else {
return false;
}
}
return false;
}
// 查询索引
public List<IndexBean> searchIndex(String keyWord, int currentPage) {
log.warn("开始查询索引");
List list = new ArrayList();
if (!ifIndexExist()) {
createIndex();
log.warn("索引不存时创建索引");
}
IndexSearcher searcher;
Query query;
TopDocs doc;
SimpleHTMLFormatter sHtmlF;
Highlighter highlighter;
try {
log.warn("根据关键字检索索引文件");
searcher = new IndexSearcher(this.getIndexDir());
searcher.setSimilarity(new IKSimilarity());
query = IKQueryParser.parse("content", keyWord);
// 设置高亮属性
sHtmlF = new SimpleHTMLFormatter("<b><font color='red'>",
"</font></b>");
highlighter = new Highlighter(sHtmlF, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(300));
doc = searcher.search(query, this.getIndexCount());
ScoreDoc[] docArray = doc.scoreDocs;
Document document;
IndexBean bean;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String indexdate;
Date day;
for (ScoreDoc d : docArray) {
document = searcher.doc(d.doc);
bean = new IndexBean();
bean.setIndexId(document.getField("id").stringValue());
bean.setTitle(document.getField("title").stringValue());
bean.setContent(document.getField("content").stringValue());
bean.setHigherContent(getHigherStr("content",
bean.getContent(), highlighter));
bean.setType(document.getField("type").stringValue());
bean.setUrl(PropertyManager.getPropertyByName(bean.getType(),
"urlConfig") == null ? null : PropertyManager
.getPropertyByName(bean.getType(), "urlConfig")
+ bean.getTargetId());
bean.setClickCount(Integer.parseInt(document.getField(
"clickcount").stringValue()));
bean.setDescription(document.getField("description")
.stringValue());
indexdate = document.getField("indexdate").stringValue();
if (null != indexdate && !"".equals(indexdate)) {
day = sdf.parse(document.getField("indexdate")
.stringValue());
bean.setIndexDate(day);
} else {
bean.setIndexDate(new Date());
}
list.add(bean);
}
} catch (CorruptIndexException e) {
log.error("检索失败,检索目录出错");
} catch (IOException e) {
log.error("检索失败,解析关键字出错");
} catch (java.text.ParseException e) {
log.error("时间格式化错误");
}
return list;
}
// 分页查询索引
public PageDataSet searchIndex(String keyWord, PageCondition page) {
log.warn("开始分页查询索引");
PageDataSet pds = new PageDataSet();
// 每页行数
pds.setPageSize(page.getPageSize());
//List<Document> list = new ArrayList<Document>();
List<IndexBean> l = new ArrayList<IndexBean>();
if (!this.ifIndexExist()) {
return pds;
}
IndexSearcher searcher;
Query query;
TopDocs doc;
Sort sort;
SimpleHTMLFormatter sHtmlF;
Highlighter highlighter;
try {
log.warn("根据关键字取得索引并按每页记录数取得每页数据");
searcher = new IndexSearcher(getIndexDir());
searcher.setSimilarity(new IKSimilarity());
if (null == keyWord) {
keyWord = "";
}
query = IKQueryParser.parse("content", keyWord);
Filter f = new QueryFilter(query);
sort = new Sort(new SortField("indexdate", SortField.STRING, true));
// 设置高亮属性
sHtmlF = new SimpleHTMLFormatter("<b><font color='red'>",
"</font></b>");
highlighter = new Highlighter(sHtmlF, new QueryScorer(query));
// 设置高亮片段长度
highlighter.setTextFragmenter(new SimpleFragmenter(PropertyManager
.getPropertyByName("index.contentSize") == null ? 300
: Integer.parseInt(PropertyManager
.getPropertyByName("index.contentSize"))));
doc = searcher.search(query, f, this.getIndexCount(), sort);
ScoreDoc[] docArray = doc.scoreDocs;
// 总记录数
pds.setTotalCount(docArray.length);
// 当前页
pds.setCurrentPage(page.getCurrentPage());
// 取每页数据从startIndex到endIndex
int startIndex = (page.getCurrentPage() - 1) * page.getPageSize();
int endIndex = startIndex + page.getPageSize();
if (endIndex > pds.getTotalCount()) {
endIndex = pds.getTotalCount();
}
// System.out.println("开始:" + startIndex + ",到" + endIndex);
// 取出分页所需的document
// 从startIndex到endIndex-1的数据.
Document document;
IndexBean bean;
Date day;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
for (int i = startIndex; i < endIndex; i++) {
document = searcher.doc(docArray[i].doc);
bean = new IndexBean();
bean.setIndexId(document.getField("id").stringValue());
bean.setTitle(document.getField("title").stringValue());
bean.setContent(document.getField("content").stringValue());
bean.setHigherContent(getHigherStr("content", StringTools
.getHtmlContent(bean.getContent()), highlighter));
bean.setTargetId(document.getField("targetid").stringValue());
bean.setType(document.getField("type").stringValue());
bean.setUrl(PropertyManager.getPropertyByName(bean.getType(),
"urlConfig") == null ? null : PropertyManager
.getPropertyByName(bean.getType(), "urlConfig")
+ bean.getTargetId());
bean.setClickCount(Integer.parseInt(document.getField(
"clickcount").stringValue()));
bean.setDescription(document.getField("description")
.stringValue());
String indexdate = document.getField("indexdate").stringValue();
if (null != indexdate && !"".equals(indexdate)) {
day = sdf.parse(document.getField("indexdate")
.stringValue());
bean.setIndexDate(day);
} else {
bean.setIndexDate(new Date());
}
l.add(bean);
}
// 分页数据
pds.setDataset(l);
} catch (CorruptIndexException e) {
log.error("分页检索失败,检索目录出错",e);
} catch (IOException e) {
log.error("分页检索失败",e);
} catch (java.text.ParseException e) {
log.error("时间格式化错误");
}
return pds;
}
// 更新索引
public void updateIndex() {
File file = new File(indexDir);
if (!file.exists()) {
file.mkdir();
createIndex();
}
List<SearchStateBean> bean = sdao.getUpdateInfo();
for (SearchStateBean b : bean) {
// 根据sate值执行相应操作
boolean isswitch = true;
switch (b.getState()) {
case SearchStateEnum.INSERT: {
insert(b);
break;
}
case SearchStateEnum.UPDATE: {
update(b);
break;
}
case SearchStateEnum.DELETE: {
delete(b);
break;
}
default: {
System.out.println(b.getId() + "状态出现问题 请更正");
isswitch = false;
}
}
if (isswitch) {
sdao.delete(b);
}
}
}
// 传入xmlBean得到索引sql
public String getSqlByXmlBean(XmlBean bean) {
String sql = "select " + bean.getPrimaryKeyName() + ",";
// 可能title与content 同时取了 同一个字段所以要过滤掉一个
for (String field : bean.getContents()) {
sql = sql + field + ",";
}
for (String field : bean.getTitles()) {
boolean ishave = true;
for (String f : bean.getContents()) {
if (field.equals(f)) {
ishave = false;
break;
}
}
if (ishave == true)
sql = sql + field + ",";
}
if (bean.getUpdateTime() != null && !bean.getUpdateTime().equals("")) {
sql = sql + bean.getCreateTime() + "," + bean.getUpdateTime() + ",";
} else {
sql = sql + bean.getCreateTime() + ",";
}
sql = sql.substring(0, sql.length() - 1) + " ";
sql = sql + "from " + bean.getTable() + " where 1=1";
for (String field : bean.getConditions()) {
if (field != null && !field.trim().equals("")) {
sql = sql + " and " + field;
}
}
return sql;
}
public int getIndexCountByCondition(IndexBean bean) {
int i = 0;
Query query = null;
IndexSearcher s = null;
try {
query = IKQueryParser.parseMultiField(ids, new String[] {
bean.getTargetId(), bean.getType() }, occur);
s = new IndexSearcher(this.indexDir);
i = s.search(query).length();
} catch (IOException e) {
log.error("取出索引个数错误:" + e);
}
return i;
}
// 添加索引document
public void addDocument(List<IndexBean> list, IndexWriter writer)
throws CorruptIndexException, IOException {
log.warn("开始写入索引字段");
Document document;
for (IndexBean bean : list) {
if (getIndexCountByCondition(bean) == 0) {
document = new Document();
Field id = new Field("id", bean.getIndexId() == null ? ""
: bean.getIndexId(), Field.Store.YES,
Field.Index.ANALYZED);
Field title = new Field("title", bean.getTitle() == null ? ""
: bean.getTitle(), Field.Store.YES,
Field.Index.ANALYZED);
Field content = new Field("content",
bean.getContent() == null ? "" : bean.getContent(),
Field.Store.YES, Field.Index.ANALYZED);
Field type = new Field("type", bean.getType() == null ? ""
: bean.getType(), Field.Store.YES, Field.Index.ANALYZED);
Field url = new Field("url", bean.getUrl() == null ? "" : bean
.getUrl(), Field.Store.YES, Field.Index.ANALYZED);
Field clickCount = new Field("clickcount", bean.getClickCount()
+ "" == null ? "" : bean.getClickCount() + "",
Field.Store.YES, Field.Index.ANALYZED);
Field indexDate = new Field("indexdate",
bean.getIndexDate() == null ? "" : bean.getIndexDate()
.toString(), Field.Store.YES,
Field.Index.NOT_ANALYZED);
Field description = new Field("description", bean
.getDescription() == null ? "" : bean.getDescription(),
Field.Store.YES, Field.Index.ANALYZED);
document.add(new Field("targetid",
bean.getTargetId() == null ? "" : bean.getTargetId(),
Field.Store.YES, Field.Index.ANALYZED));
document.add(id);
document.add(title);
document.add(content);
document.add(type);
document.add(url);
document.add(clickCount);
document.add(indexDate);
document.add(description);
writer.addDocument(document);
}
}
}
//
public IndexBean getIndexBean(XmlBean xmlBean) {
List<XmlBean> xmlBeans = new ArrayList<XmlBean>();
xmlBeans.add(xmlBean);
List<IndexBean> indexbean = this.getIndexBean(xmlBeans);
if (indexbean.size() > 0)
return indexbean.get(0);
return null;
}
// 与数据集库同步更新
public void update(SearchStateBean bean) {
log.warn("先删除已更改的索引");
deleteIndex(ids, new String[] { bean.getTargetId(),
bean.getTargetTable() }, occur);
log.warn("再加入更改后的索引");
insert(bean);
}
// 与数据同步插入
public void insert(SearchStateBean bean) {
log.warn("开始同步数据");
xmlService = new SAXXmlParserServiceImpl();
XmlBean xmlBean = xmlService.getXmlBeanByType(bean.getTargetTable());
String sql = this.getSqlByXmlBean(xmlBean) + " and "
+ xmlBean.getPrimaryKeyName() + "='" + bean.getTargetId() + "'";
log.warn("需要更新的sql语句: " + sql);
List<IndexBean> indexBeans = dao.getIndexBean(sql, null, xmlBean);
IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
IndexWriter writer = null;
try {
writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
addDocument(indexBeans, writer);
log.warn("同步数据成功,关闭writer");
} catch (CorruptIndexException e) {
log.error("同步数据失败" + e);
} catch (LockObtainFailedException e) {
log.error("同步数据失败" + e);
} catch (IOException e) {
log.error("同步数据失败" + e);
} finally {
try {
if (writer != null)
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
log.error(e.toString());
} catch (IOException e) {
// TODO Auto-generated catch block
log.error(e.toString());
}
}
}
// 与数据同步删除
public void delete(SearchStateBean bean) {
this.deleteIndex(ids, new String[] { bean.getTargetId(),
bean.getTargetTable() }, occur);
}
// 根据索引条件删除索引
public void deleteIndex(String[] fields, String[] values, Occur[] occur) {
log.warn("开始根据条件删除索引");
IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
IndexWriter writer = null;
Query query;
try {
writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
query = IKQueryParser.parseMultiField(fields, values, occur);
writer.deleteDocuments(query);
writer.optimize();
log.warn("删除索引成功,关闭writer");
} catch (CorruptIndexException e) {
log.error("删除索引失败" + e);
} catch (LockObtainFailedException e) {
log.error("删除索引失败" + e);
} catch (IOException e) {
log.error("删除索引失败" + e);
} finally {
try {
if (writer != null)
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
log.error(e.toString());
} catch (IOException e) {
// TODO Auto-generated catch block
log.error(e.toString());
}
}
}
// 高亮字段
public String getHigherStr(String field, String value,
Highlighter highlighter) {
TokenStream stream = new IKAnalyzer().tokenStream(field,
new StringReader(value));
String str = null;
try {
str = highlighter.getBestFragment(stream, value);
} catch (IOException e) {
log.error("获得高亮字段失败" + e);
}
return str;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
// 获得当前数据库中的所有数据的数量
public int getDataCount() {
String sql = "select count(*) from ";
int count = 0;
for (XmlBean bean : beans) {
count = count + dao.getUniqueValue(sql + bean.getTable());
}
return count;
}
// 获得当前索引的数量
public int getIndexCount() {
int count = 0;
if (ifIndexExist()) {
IndexReader read = null;
try {
read = IndexReader.open(this.indexDir);
count = read.numDocs();
read.close();
} catch (CorruptIndexException e) {
log.error("获得当前索引数量失败" + e);
} catch (IOException e) {
log.error("获得当前索引数量失败" + e);
}
}
return count;
}
}