Lucene全文检索引擎

发布时间：2023-09-06 01:24责任编辑：董明明关键词：暂无标签

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion> ?<groupId>demo.lucene</groupId> ?<artifactId>Lucene01</artifactId> ?<version>0.0.1-SNAPSHOT</version> ?<build/> ?<dependencies> ???<!-- lucene核心包 --> ???<dependency> ???????<groupId>org.apache.lucene</groupId> ???????<artifactId>lucene-core</artifactId> ???????<version>5.3.1</version> ???</dependency> ???<!-- lucene查询解析包 --> ???<dependency> ???????<groupId>org.apache.lucene</groupId> ???????<artifactId>lucene-queryparser</artifactId> ???????<version>5.3.1</version> ???</dependency> ???<!-- lucene解析器包 --> ???<dependency> ???????<groupId>org.apache.lucene</groupId> ???????<artifactId>lucene-analyzers-common</artifactId> ???????<version>5.3.1</version> ???</dependency> ?</dependencies></project>

import java.io.File;import java.io.FileReader;import java.nio.file.Paths;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;/** * 建立索引的类 * @author Ni Shengwu * */public class Indexer { ???private IndexWriter writer; //写索引实例 ???//构造方法，实例化IndexWriter ???public Indexer(String indexDir) throws Exception { ???????Directory dir = FSDirectory.open(Paths.get(indexDir)); ???????Analyzer analyzer = new StandardAnalyzer(); //标准分词器，会自动去掉空格啊，is a the等单词 ???????IndexWriterConfig config = new IndexWriterConfig(analyzer); //将标准分词器配到写索引的配置中 ???????writer = new IndexWriter(dir, config); //实例化写索引对象 ???} ???//关闭写索引 ???public void close() throws Exception { ???????writer.close(); ???} ???//索引指定目录下的所有文件 ???public int indexAll(String dataDir) throws Exception { ???????File[] files = new File(dataDir).listFiles(); //获取该路径下的所有文件 ???????for(File file : files) { ???????????indexFile(file); //调用下面的indexFile方法，对每个文件进行索引 ???????} ???????return writer.numDocs(); //返回索引的文件数 ???} ???//索引指定的文件 ???private void indexFile(File file) throws Exception { ???????System.out.println("索引文件的路径：" + file.getCanonicalPath()); ???????Document doc = getDocument(file); //获取该文件的document ???????writer.addDocument(doc); //调用下面的getDocument方法，将doc添加到索引中 ???} ???//获取文档，文档里再设置每个字段，就类似于数据库中的一行记录 ???private Document getDocument(File file) throws Exception{ ???????Document doc = new Document(); ???????//添加字段 ???????doc.add(new TextField("contents", new FileReader(file))); //添加内容 ???????doc.add(new TextField("fileName", file.getName(), Field.Store.YES)); //添加文件名，并把这个字段存到索引文件里 ???????doc.add(new TextField("fullPath", file.getCanonicalPath(), Field.Store.YES)); //添加文件路径 ???????return doc; ???} ???public static void main(String[] args) { ???????String indexDir = "D:\\lucene"; //将索引保存到的路径 ???????String dataDir = "D:\\lucene\\data"; //需要索引的文件数据存放的目录 ???????Indexer indexer = null; ???????int indexedNum = 0; ???????long startTime = System.currentTimeMillis(); //记录索引开始时间 ???????try { ???????????indexer = new Indexer(indexDir); ???????????indexedNum = indexer.indexAll(dataDir); ???????} catch (Exception e) { ???????????e.printStackTrace(); ???????} finally { ???????????try { ???????????????indexer.close(); ???????????} catch (Exception e) { ???????????????e.printStackTrace(); ???????????} ???????} ???????long endTime = System.currentTimeMillis(); //记录索引结束时间 ???????System.out.println("索引耗时" + (endTime-startTime) + "毫秒"); ???????System.out.println("共索引了" + indexedNum + "个文件"); ???}}

import java.nio.file.Paths;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;public class Searcher {public static void search(String indexDir, String q) throws Exception { ???????Directory dir = FSDirectory.open(Paths.get(indexDir)); //获取要查询的路径，也就是索引所在的位置 ???????IndexReader reader = DirectoryReader.open(dir); ???????IndexSearcher searcher = new IndexSearcher(reader); ???????Analyzer analyzer = new StandardAnalyzer(); //标准分词器，会自动去掉空格啊，is a the等单词 ???????QueryParser parser = new QueryParser("contents", analyzer); //查询解析器 ???????Query query = parser.parse(q); //通过解析要查询的String，获取查询对象 ???????long startTime = System.currentTimeMillis(); //记录索引开始时间 ???????TopDocs docs = searcher.search(query, 10);//开始查询，查询前10条数据，将记录保存在docs中 ???????long endTime = System.currentTimeMillis(); //记录索引结束时间 ???????System.out.println("匹配" + q + "共耗时" + (endTime-startTime) + "毫秒"); ???????System.out.println("查询到" + docs.totalHits + "条记录"); ???????for(ScoreDoc scoreDoc : docs.scoreDocs) { //取出每条查询结果 ???????????Document doc = searcher.doc(scoreDoc.doc); //scoreDoc.doc相当于docID,根据这个docID来获取文档 ???????????System.out.println(doc.get("fullPath")); //fullPath是刚刚建立索引的时候我们定义的一个字段 ???????} ???????reader.close(); ???} ???public static void main(String[] args) { ???????String indexDir = "D:\\lucene"; ???????String q = "generate-maven-artifacts"; //查询这个字符串 ???????try { ???????????search(indexDir, q); ???????} catch (Exception e) { ???????????e.printStackTrace(); ???????} ???}}

pom.xml

Lucene全文检索引擎

原文地址：http://www.cnblogs.com/Jansens520/p/7813924.html

Lucene全文检索引擎

知识推荐