Lucene和ssh框架结合

最近公司做一个文档管理系统，需要用到Lucene搜索引擎，一接触这个东东也开始在网上搜索一翻，但是搜索了很久都没能找到一个完全能整合我们现有的SSH的框架中。

先大体说一下，我遇到的问题：

1.包冲突的问题

2.分词类的选择

对问题解决方法：

1.关键包使用以下包： Hibernate core 3.3.1.GA，Hibernate Search 3.1.0.GA,Hibernate Annotations 3.2.0.GA,slf4j-api- 1.5.6.jar，slf4j-log4j12-1.5.6.jar，log4j-1.2.15.jar，solr-core-1.3.0.jar，Lucene2.9.1,Spring2.5,struts2.1

2.使用中文分词类得特别注意，一些中文分词类只能识别中文，对英文支持不好，建议使用：je-analysis-1.5.1.jar

现在把一些关键的部分的代码复制如下：

持久对象：

[java] view plain copy

package com.yhwj.docsh.domain;

import java.util.Date;

import java.util.Set;

import javax.persistence.Entity;

import javax.persistence.Id;

import jeasy.analysis.MMAnalyzer;

import org.hibernate.search.annotations.Analyzer;

import org.hibernate.search.annotations.DocumentId;

import org.hibernate.search.annotations.Field;

import org.hibernate.search.annotations.Index;

import org.hibernate.search.annotations.Indexed;

import org.hibernate.search.annotations.Store;

@Entity

@Analyzer(impl = MMAnalyzer.class)

@Indexed(index="Doc")

public class Doc implements Cloneable{

private Long id;

private String docName;

/**

* 文件大小：byte

private Long docSize;

/**

* 文件类型：doc xls txt

private String docType;

/**

* 文件种类：综合类、资源类、维护优化类、资本工程类、人力资源类、网管支撑类、监控应急类、企业发展类、安全类

private String docVariety;

/**

* 上传者id

private User docUpLoader;

/**

* 收藏夹

private Set<Collector> collectors;

/**

* 上传时间

private Date docUpTime;

/**

* 可否下载

private boolean download;

/**

* 可否删除 1为删除 0为未删除

private boolean deleted;

/**

* 关键字

private String keywords;

/**

* 权限 1：向所有用户开放 2：仅向上级领导开放 3：只针对部分选择的用户开放

private int limited;

/**

* 当权限为3时，此处用户列表就是该文件的开放访问列表

private String limitedUser;

/**

* 描述

private String detail;

/**

* 用户名

private String userName;

@Id

@DocumentId

public Long getId() {

return id;

}

public void setId(Long id) {

this.id = id;

}

@Field(name="docName",index=Index.TOKENIZED,store=Store.YES)

public String getDocName() {

return docName;

}

public void setDocName(String docName) {

this.docName = docName;

}

public Long getDocSize() {

return docSize;

}

public void setDocSize(Long docSize) {

this.docSize = docSize;

}

@Field(name="docType",index=Index.TOKENIZED,store=Store.YES)

public String getDocType() {

return docType;

}

public void setDocType(String docType) {

this.docType = docType;

}

@Field(name="docVariety",index=Index.TOKENIZED,store=Store.YES)

public String getDocVariety() {

return docVariety;

}

public void setDocVariety(String docVariety) {

this.docVariety = docVariety;

}

public User getDocUpLoader() {

return docUpLoader;

}

public void setDocUpLoader(User docUpLoader) {

this.docUpLoader = docUpLoader;

}

@Field(name="docUpTime",index=Index.TOKENIZED,store=Store.YES)

public Date getDocUpTime() {

return docUpTime;

}

public void setDocUpTime(Date docUpTime) {

this.docUpTime = docUpTime;

}

public boolean isDownload() {

return download;

}

public void setDownload(boolean download) {

this.download = download;

}

public boolean isDeleted() {

return deleted;

}

public void setDeleted(boolean deleted) {

this.deleted = deleted;

}

@Field(name="keyWords",index=Index.TOKENIZED,store=Store.YES)

public String getKeywords() {

return keywords;

}

public void setKeywords(String keywords) {

this.keywords = keywords;

}

public int getLimited() {

return limited;

}

public void setLimited(int limited) {

this.limited = limited;

}

public String getLimitedUser() {

return limitedUser;

}

public void setLimitedUser(String limitedUser) {

this.limitedUser = limitedUser;

}

public String getDetail() {

return detail;

}

public void setDetail(String detail) {

this.detail = detail;

}

/**

* 注意Lucene中文分词不支持类关联

**/

@Field(name="userName",index=Index.TOKENIZED,store=Store.YES)

public String getUserName() {

return docUpLoader.getUsername();

}

public void setUserName(String userName) {

this.userName = userName;

}

public Set<Collector> getCollectors() {

return collectors;

}

public void setCollectors(Set<Collector> collectors) {

this.collectors = collectors;

}

Hibernate的配置文件，这个我个人认为是必须的，在网上看到很多直接在spring配置文件里，直接进行配置，但是我尝试了很多次最终没有成功，故最后还是不得不使用Hibernate配置文件

[xhtml] view plain copy

<?xml version='1.0' encoding='UTF-8'?>

<!DOCTYPE hibernate-configuration PUBLIC

"-//Hibernate/Hibernate Configuration DTD 3.0//EN"

"http://hibernate.sourceforge.net/hibernate-configuration-3.0.dtd">

<hibernate-configuration>

<session-factory name="sessionFactory">

<property name="hibernate.search.default.indexBase">D://indexDir//file</property>

<property name="hibernate.lucene.analyzer">jeasy.analysis.MMAnalyzer</property>

</event>

</event>

</event>

</session-factory>

</hibernate-configuration>

配置spring加载Hibernate文件的代码，注意业务逻辑层需要申明事物，Hibernate只有在事物提交的时候采取更新索引

[xhtml] view plain copy

<list>

</list>

</property>

<props>

<prop key="hibernate.dialect">${hibernate.dialect}</prop>

<prop key="hibernate.show_sql">${hibernate.show_sql}</prop>

<prop

key="hibernate.format_sql">${hibernate.format_sql}</prop>

<prop key="hibernate.use_sql_comments">${hibernate.use_sql_comments}</prop>

<prop key="hibernate.hbm2ddl.auto">${hibernate.hbm2ddl.auto}</prop>

</props>

</property>

<value type="org.springframework.core.io.Resource">classpath:hibernate.cfg.xml</value>

</property>

</bean>

采用Lucene搜索进行更新操作，Hibernate会自动帮我们去维护索引，下面是使用Lucene去搜索文件的代码

[c-sharp] view plain copy

package com.yhwj.docsh.util;

import java.io.File;

import java.util.ArrayList;

import java.util.List;

import jeasy.analysis.MMAnalyzer;

import org.apache.commons.lang.StringUtils;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.queryParser.MultiFieldQueryParser;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.BooleanClause;

import org.apache.lucene.search.BooleanQuery;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopScoreDocCollector;

import org.apache.lucene.search.BooleanClause.Occur;

import org.apache.lucene.search.highlight.Formatter;

import org.apache.lucene.search.highlight.Highlighter;

import org.apache.lucene.search.highlight.QueryScorer;

import org.apache.lucene.search.highlight.SimpleFragmenter;

import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

import org.apache.oro.text.regex.MalformedPatternException;

import org.apache.oro.text.regex.MatchResult;

import org.apache.oro.text.regex.Pattern;

import org.apache.oro.text.regex.PatternMatcherInput;

import org.apache.oro.text.regex.Perl5Compiler;

import org.apache.oro.text.regex.Perl5Matcher;

import com.yhwj.docsh.data.VFile;

/**

* Lucene解析类

* @author sunguoqiang sunguoqiang@yhwj.com 2010-12-13下午04:05:55

public class LuceneUtil {

private static final String FIELD_DOCNAME = "docName";

private static final String FIELD_USERNAME = "userName";

private static final String FIELD_KEYWORDS = "keyWords";

private static final String FIELD_DOCVARIETY = "docVariety";

private static final String FIELD_DOCTYPE = "docType";

private static final String PATH = "D://indexDir//file//Doc";

public synchronized Analyzer getAnalyzer() {

return new MMAnalyzer();

}

/**

* 搜索

* @param keywords 关键字

* @param docType 文件类型

* @param begin 开始

* @param number 结束用于分页

* @return

* @throws MalformedPatternException

@SuppressWarnings("deprecation")

public List<VFile> searchFile(String keywords,String docType,int begin,int number) throws MalformedPatternException {

List<VFile> vFiles = new ArrayList<VFile>();

File fsDir = new File(PATH);

try {

// 索引查询

IndexReader reader = IndexReader.open(FSDirectory.open(fsDir), true); // only searching, so // read-only=true

IndexSearcher isearcher = new IndexSearcher(reader);

TopScoreDocCollector collector = TopScoreDocCollector.create(100,true);

BooleanQuery booleanQuery = new BooleanQuery();

if(StringUtils.isNotBlank(keywords))

{

/* 下面这个表示要同时搜索这三个域，而且只要一个域里面有满足我们搜索的内容就行 */

BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};

Query query = MultiFieldQueryParser.parse(keywords, new String[] {FIELD_DOCNAME, FIELD_KEYWORDS,FIELD_USERNAME,FIELD_DOCVARIETY},clauses, getAnalyzer());

booleanQuery.add(query, Occur.SHOULD);

}

QueryParser parser_docType = new QueryParser(Version.LUCENE_29,FIELD_DOCTYPE, getAnalyzer());

if(StringUtils.isEmpty(docType))

{

docType = "all";

}

// query.setBoost(1.0f);

if(docType.equals("all"))

{

Query query_docType = parser_docType.parse("doc xls ppt");// 检索词

booleanQuery.add(query_docType, Occur.SHOULD);

}

else {

Query query_docType = parser_docType.parse(docType);// 检索词

booleanQuery.add(query_docType, Occur.MUST);

}

System.out.println(booleanQuery.toString());

isearcher.search(booleanQuery, collector);

ScoreDoc[] hits = collector.topDocs().scoreDocs;

// 用这个进行高亮显示，默认是..

Formatter formatter = new SimpleHTMLFormatter("", "");

QueryScorer qs = new QueryScorer(booleanQuery);

// 构造高亮

// 指定高亮的格式

// 指定查询评分

Highlighter highlighter = new Highlighter(formatter,qs);

// 这个一般等于你要返回的，高亮的数据长度

// 如果太小，则只有数据的开始部分被解析并高亮，且返回的数据也少

// 太大，有时太浪费了。

highlighter.setTextFragmenter(new SimpleFragmenter(1000));

for (int i = begin; i < hits.length && i < begin + number; i++) {

VFile vFile = new VFile();

Document d = isearcher.doc(hits[i].doc);

vFile.setId(Long.valueOf(d.get("id")));

vFile.setFileName(d.get("docName"));

vFile.setFileVariety(d.get("docVariety"));

vFile.setKeywords(d.get("keyWords"));

vFile.setUpLoader(d.get("userName"));

vFile.setUpTime(d.get("docUpTime"));

vFiles.add(vFile);

String str = highlighter.getBestFragment(getAnalyzer(), FIELD_DOCNAME, d.get(FIELD_DOCNAME));

System.out.println(str);

vFile.setFileName(str);

}

isearcher.close();

} catch (Exception e) {

e.printStackTrace();

}

return vFiles;

}

public static void main(String[] args) throws MalformedPatternException {

String keywords = "你好你好 test 您";

String[] keys = new LuceneUtil().getKeys(keywords);

System.out.println(keys.toString());

}

public String[] getKeys(String keywords) throws MalformedPatternException

{

List<String> list = new ArrayList<String>();

Pattern pattern = new Perl5Compiler().compile("//S+");

Perl5Matcher matcher = new Perl5Matcher();

PatternMatcherInput matcherInput = new PatternMatcherInput(keywords);

while (matcher.contains(matcherInput, pattern)) {

MatchResult result = matcher.getMatch();

list.add(result.group(0));

}

return (String[]) list.toArray(new String[list.size()]);

}

本站仅提供存储服务，所有内容均由用户发布，如发现有害或侵权内容，请点击举报。