Lucene教程(二) 搜索初步

搜索可分为如下几步:

  1. 创建Directory
  2. 创建IndexReader    
  3. 根据IndexReader创建IndexSearch        
  4. 创建搜索的Query 
  5. 根据searcher搜索并且返回TopDocs  
  6. 根据TopDocs获取ScoreDoc对象        
  7. 根据searcher和ScoreDoc对象获取具体的Document对象          
  8. 根据Document对象获取需要的值
下面是例子代码:
3.5版本:
3.5版本比较简单,只需要Lucene核心包lucene-core即可,pom文件如下所示:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.darren.lucene35.helloworld</groupId>
    <artifactId>lucene35_helloworld</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>lucene35_helloworld</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <lucene.version>3.5.0</lucene.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>
    </dependencies>
</project>
例子代码如下:

package com.darren.lucene35;

import java.io.File;
import java.io.FileReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class HelloLucene {
    /**
     * 搜索
     */
    public void search() {
        IndexReader indexReader = null;
        try {
            // 1、创建Directory
            Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));
            // 2、创建IndexReader
            indexReader = IndexReader.open(directory);
            // 3、根据IndexReader创建IndexSearch
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            // 4、创建搜索的Query
            // 使用默认的标准分词器
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);

            // 在content中搜索Darren
            // 创建parser来确定要搜索文件的内容,第二个参数为搜索的域
            QueryParser queryParser = new QueryParser(Version.LUCENE_35, "content", analyzer);
            // 创建Query表示搜索域为content包含Darren的文档
            Query query = queryParser.parse("Darren");

            // 5、根据searcher搜索并且返回TopDocs
            TopDocs topDocs = indexSearcher.search(query, 10);
            // 6、根据TopDocs获取ScoreDoc对象
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                // 7、根据searcher和ScoreDoc对象获取具体的Document对象
                Document document = indexSearcher.doc(scoreDoc.doc);
                // 8、根据Document对象获取需要的值
                System.out.println(document.get("filename") + " " + document.get("filepath"));
            }

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (indexReader != null) {
                    indexReader.clone();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

    }
}

4.5版本:
4.5版本需要Lucene核心包lucene-core和查询包lucene-queryparser,从4.0版本之后分词包从核心包分离,pom文件如下所示:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.darren.lucene45.helloworld</groupId>
    <artifactId>lucene45_helloworld</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>lucene45_helloworld</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <lucene.version>4.5.1</lucene.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>${lucene.version}</version>
        </dependency>
    </dependencies>
</project>
例子代码如下:
package com.darren.lucene45;

import java.io.File;
import java.io.FileReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class HelloLucene {
    /**
     * 搜索
     */
    public void search() {
        DirectoryReader directoryReader = null;
        try {
            // 1、创建Directory
            Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));
            // 2、创建IndexReader
            /**
             * 注意Reader与3.5版本不同:
             * 
             * 所以使用DirectoryReader
             * 
             * @Deprecated public static DirectoryReader open(final Directory directory) throws IOException { return
             *             DirectoryReader.open(directory); }
             */
            // 如下方法过时
            // IndexReader indexReader = IndexReader.open(directory);
            directoryReader = DirectoryReader.open(directory);
            // 3、根据IndexReader创建IndexSearch

            IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

            // 4、创建搜索的Query
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
            /**
             * 注意与3.5版本不同:
             * 
             * 需要引入lucene-queryparser包,因为从4.0版本后lucene-queryparser包从核心包分离
             */
            // 创建parser来确定要搜索文件的内容,第二个参数为搜索的域
            QueryParser queryParser = new QueryParser(Version.LUCENE_45, "content", analyzer);

            // 创建Query表示搜索域为content包含Darren的文档
            Query query = queryParser.parse("Darren");

            // 5、根据searcher搜索并且返回TopDocs
            TopDocs topDocs = indexSearcher.search(query, 10);

            // 6、根据TopDocs获取ScoreDoc对象
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {

                // 7、根据searcher和ScoreDoc对象获取具体的Document对象
                Document document = directoryReader.document(scoreDoc.doc);

                // 8、根据Document对象获取需要的值
                System.out.println(document.get("filename") + " " + document.get("filepath"));
            }

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (directoryReader != null) {
                    directoryReader.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}
5.0版本:
5.0版本和4.5版本一样,pom文件如下所示:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.darren.lucene50.helloworld</groupId>
    <artifactId>lucene50_helloworld</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>lucene50_helloworld</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <lucene.version>5.0.0</lucene.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>${lucene.version}</version>
        </dependency>
    </dependencies>
</project>
例子代码如下:
package com.darren.lucene50;

import java.io.File;
import java.io.FileReader;
import java.nio.file.FileSystems;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class HelloLucene {
    /**
     * 搜索
     */
    public void search() {
        DirectoryReader directoryReader = null;
        try {
            // 1、创建Directory
            Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/test/lucene/index"));
            // 2、创建IndexReader
            /**
             * 注意Reader与3.5版本不同:
             * 
             * 所以使用DirectoryReader
             * 
             * @Deprecated public static DirectoryReader open(final Directory directory) throws IOException { return
             *             DirectoryReader.open(directory); }
             * 
             *             但是和4.5版本相同
             */
            // 如下方法过时
            // IndexReader indexReader = IndexReader.open(directory);
            directoryReader = DirectoryReader.open(directory);

            // 3、根据IndexReader创建IndexSearch
            IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

            // 4、创建搜索的Query
            /**
             * 注意StandardAnalyzer与3.5版本4.5版本不同:
             * 
             * 不需要版本号
             */
            Analyzer analyzer = new StandardAnalyzer();
            // 创建parser来确定要搜索文件的内容,第一个参数为搜索的域
            /**
             * 注意QueryParser与3.5版本4.5版本不同:
             * 
             * 不需要版本号
             */
            QueryParser queryParser = new QueryParser("content", analyzer);
            // 创建Query表示搜索域为content包含Darren的文档
            Query query = queryParser.parse("Darren");

            // 5、根据searcher搜索并且返回TopDocs
            TopDocs topDocs = indexSearcher.search(query, 10);

            // 6、根据TopDocs获取ScoreDoc对象
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {

                // 7、根据searcher和ScoreDoc对象获取具体的Document对象
                Document document = indexSearcher.doc(scoreDoc.doc);

                // 8、根据Document对象获取需要的值
                System.out.println(document.get("filename") + " " + document.get("filepath"));
            }

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (directoryReader != null) {
                    directoryReader.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}
测试代码:
package com.darren.lucene50;

import org.junit.Test;

public class HelloLuceneTest {

    @Test
    public void testSearch() {
        HelloLucene helloLucene = new HelloLucene();
        helloLucene.search();
    }
}

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。