ES文档搜索

框架学习

发布时间 : 2024-05-16 15:59

字数:2.1k 阅读 :

ES文档搜索
1. 使用Java ES客户端

ES文档搜索

ElasticSearch与Kibana安装以及ik分词器安装请看之前的文章，这里就不做过多赘述。

ES语言命名规范

仅可能为小写字母，不能下划线开头

索引库名称，包括属性名称都用小写字母加下划线的方式命名

创建索引库

需要提前安装ik分词器

创建索引库，包含两个属性，文件名称和文本内容，使用ik_smart分词

PUT /text_file_doc
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "file_name":{
        "type": "text", 
        "analyzer": "ik_smart"
      },
      "content":{
          "type": "text",
        "analyzer": "ik_smart"
      }
    }
  }
}

使用Java ES客户端

<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-high-level-client</artifactId>
    <version>7.6.1</version>
</dependency>

<dependency>
    <groupId>com.fasterxml.jackson.core</groupId>
    <artifactId>jackson-databind</artifactId>
    <version>2.9.1</version>
</dependency>

<dependency>
    <groupId>org.apache.logging.log4j</groupId>
    <artifactId>log4j-core</artifactId>
    <version>2.10.0</version>
</dependency>

一个是ES的java客户端，还有一个是用于封装JSON字符串的依赖

需要将日志文件添加到resource文件下

<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
    <Appenders>
        <Console name="Console" target="SYSTEM_OUT">
            <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
        </Console>
        <!-- 添加其他 appender，如 RollingFile 等 -->
    </Appenders>
    <Loggers>
        <Root level="info">
            <AppenderRef ref="Console"/>
            <!-- 添加其他 appender 的引用 -->
        </Root>
    </Loggers>
</Configuration>

索引库相关操作

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.GetIndexResponse;

import java.io.IOException;

public class ESTest {

    public static RestHighLevelClient client;

    static {
        client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.119.88", 9200, "http")));
    }

    public static void main(String[] args) {


        ESUtils esUtils = new ESUtils(client);

        try {
            //创建索引
            boolean created = esUtils.createIndex("text_file_doc");
            System.out.println(created);

            //查询索引消息
            GetIndexResponse response = esUtils.getIndex("text_file_doc");
            System.out.println(response.getAliases());
            System.out.println(response.getMappings());
            System.out.println(response.getSettings());

            //删除索引
            boolean result = esUtils.deleteIndex("text_file_doc");
            System.out.println(result);

        } catch (IOException ioException) {
            ioException.printStackTrace();
        } finally {
            try {
                client.close();
            } catch (IOException ioException) {
                ioException.printStackTrace();
            }
        }

    }

}

工具类

本文所使用到的ES工具类

import com.fasterxml.jackson.databind.ObjectMapper;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.client.indices.GetIndexResponse;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilder;

import java.io.IOException;
import java.util.List;


public class ESUtils {

    public SearchUtils searchUtils;

    public static ObjectMapper objectMapper = new ObjectMapper();

    private RestHighLevelClient client;

    public ESUtils(RestHighLevelClient client){
        this.client = client;
        this.searchUtils = new SearchUtils(client);
    }

    class SearchUtils {

        private RestHighLevelClient client;

        public SearchUtils(RestHighLevelClient client) {
            this.client = client;
        }

        public SearchResponse search(String indexName, QueryBuilder queryBuilder) throws IOException {
            SearchRequest request = new SearchRequest(indexName);
            request.source().query(queryBuilder);
            SearchResponse searchResponse = client.search(request, RequestOptions.DEFAULT);
            return searchResponse;
        }


    }

    /**
     * 创建索引库，使用默认settings，默认mappings
     * @param indexName 索引库名称
     * @return
     * @throws IOException
     */
    public boolean createIndex(String indexName) throws IOException {
        CreateIndexRequest request = new CreateIndexRequest(indexName);
        CreateIndexResponse response = client.indices().create(request, RequestOptions.DEFAULT);
        return response.isAcknowledged();
    }

    /**
     * 获取索引库信息
     * @param indexName 索引库名称
     * @return
     * @throws IOException
     */
    public GetIndexResponse getIndex(String indexName) throws IOException {
        GetIndexRequest getIndexRequest = new GetIndexRequest(indexName);
        GetIndexResponse response = client.indices().get(getIndexRequest, RequestOptions.DEFAULT);
        return response;
    }

    /**
     * 删除索引库
     * @param indexName 索引库名称
     * @return
     * @throws IOException
     */
    public boolean deleteIndex(String indexName) throws IOException {
        DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest(indexName);
        AcknowledgedResponse response = client.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
        return response.isAcknowledged();
    }

    /**
     * 添加文档到索引库
     * @param indexName 索引库名称
     * @param docId 指定文档id
     * @param doc 文档对象
     * @return
     * @throws IOException
     */
    public IndexResponse addDoc(String indexName,String docId,Object doc) throws IOException {
        IndexRequest request = new IndexRequest();
        request.index(indexName).id(docId);

        String data = objectMapper.writeValueAsString(doc);
        request.source(data, XContentType.JSON);
        IndexResponse response = client.index(request, RequestOptions.DEFAULT);

        return response;
    }

    /**
     * 添加文档到索引库
     * @param indexName 索引库名称
     * @param doc 文档对象
     * @return
     * @throws IOException
     */
    public IndexResponse addDoc(String indexName,Object doc) throws IOException {
        IndexRequest request = new IndexRequest();
        request.index(indexName);

        String data = objectMapper.writeValueAsString(doc);
        request.source(data, XContentType.JSON);
        IndexResponse response = client.index(request, RequestOptions.DEFAULT);

        return response;
    }

    /**
     * 批量添加文档
     * @param indexName 索引库名称
     * @param docs 文档列表
     * @throws IOException
     */
    public BulkResponse bulkAddDoc(String indexName, List<?> docs) throws IOException {
        BulkRequest bulkRequest = new BulkRequest();

        for (Object doc : docs) {
            IndexRequest request = new IndexRequest();
            request.index(indexName);
            String data = objectMapper.writeValueAsString(doc);
            request.source(data,XContentType.JSON);
            bulkRequest.add(request);
        }

        BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
        return bulkResponse;
    }


}

创建索引的方法没有提前指定mappings，因此没有约束

如果在创建时要指定mappings和settings等约束，使用

CreateIndexRequest request = new CreateIndexRequest(indexName);
request.source(source,XContentType.JSON);

查询测试

GET /text_file_doc/_search
{
    "query":{
        "match":{
            "context":"docker"
        }
    }
}

{
  "took" : 4424,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 0.57595575,
    "hits" : [
      {
        "_index" : "text_file_doc",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.57595575,
        "_source" : {
          "fileName" : "ElasticSearch入门",
          "context" : """# 分布式搜索ES

## Elastic Search简介

**什么是elasticsearch？**

elasticsearch是一款非常强大的开源搜索引擎，可以帮助我们从海量数据中快速找到需要的内容

elasticsearch结合kibana、Logstash、Beats，也就是**elastic stack（ELK）**，被广泛应用在日志数据分析、实时监控等领域

查询结果如上

清空索引库方法

POST /text_file_doc/_delete_by_query
{
  "query": {
    "match_all": {}
  }
}

批量插入文档

下面是一个简单的需求，将电脑指定目录下的所有markdown文件存入ES并创建倒排索引，然后通过关键词进行文档搜索。

创建文本文档java类

封装对应的文档类

public class TextFileDoc {

    private String fileName;

    private String context;


    public String getFileName() {
        return fileName;
    }

    public void setFileName(String fileName) {
        this.fileName = fileName;
    }

    public String getContext() {
        return context;
    }

    public void setContext(String context) {
        this.context = context;
    }
}

文档读取工具

文档读取工具，到指定目录下读取对应类型的文档信息

import com.os467.entity.TextFileDoc;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class ESLoadLongTextDocument {

    private StringBuilder stringBuilder = new StringBuilder();

    /**
     * 读取该目录下所有指定类型的文档信息
     * @param dirPath 目录路径
     * @param extraName 后缀名 如 docx md txt
     */
    public List<TextFileDoc> loadAllDocumentFile(String dirPath,String extraName) throws IOException {
        List<TextFileDoc> textFileDocList = new ArrayList<>();
        File file = new File(dirPath);
        if (file.exists()){
            readFile(file,extraName,textFileDocList);
        }
        return textFileDocList;
    }

    private void readFile(File file, String extraName,List<TextFileDoc> textFileDocs) throws IOException {
        if (file.isDirectory()){
            File[] files = file.listFiles();
            for (File f : files) {
                readFile(f,extraName,textFileDocs);
            }
        }else {
            if (file.getName().endsWith("."+extraName)){
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
                char[] chars = new char[1024 * 1024];
                int len;
                while ((len = bufferedReader.read(chars)) != -1){
                    stringBuilder.append(chars,0,len);
                }
                TextFileDoc textFileDoc = new TextFileDoc();
                textFileDoc.setFileName(file.getName());
                textFileDoc.setContext(stringBuilder.toString());
                textFileDocs.add(textFileDoc);
                stringBuilder.setLength(0);
            }
        }
    }

}

批量插入

import com.os467.entity.TextFileDoc;
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;

import java.io.IOException;
import java.util.List;

public class ESTest {

    public static RestHighLevelClient client;

    static {
        client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.119.88", 9200, "http")));
    }

    public static void main(String[] args) {


        ESUtils esUtils = new ESUtils(client);

        try {

            ESLoadLongTextDocument esLoadLongTextDocument = new ESLoadLongTextDocument();

            List<TextFileDoc> fileDocs = esLoadLongTextDocument.loadAllDocumentFile("D:\\md笔记\\java后端", "md");

            BulkResponse response = esUtils.bulkAddDoc("text_file_doc", fileDocs);
            System.out.println(response.status());


        } catch (IOException ioException) {
            ioException.printStackTrace();
        } finally {
            try {
                client.close();
            } catch (IOException ioException) {
                ioException.printStackTrace();
            }
        }

    }

}

查询结果

import com.os467.entity.TextFileDoc;
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;

import java.io.IOException;

public class ESTest {

    public static RestHighLevelClient client;

    static {
        client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.119.88", 9200, "http")));
    }

    public static void main(String[] args) {


        ESUtils esUtils = new ESUtils(client);


        try {
            ESUtils.SearchUtils searchUtils = esUtils.searchUtils;


            SearchResponse response =
                    searchUtils.search("text_file_doc", QueryBuilders.matchQuery("context", "mybatis配置"));

            SearchHits hits = response.getHits();


            for (SearchHit hit : hits.getHits()) {
                String source = hit.getSourceAsString();
                TextFileDoc textFileDoc = ESUtils.objectMapper.readValue(source, TextFileDoc.class);
                System.out.println(textFileDoc.getFileName());
            }

        } catch (IOException ioException) {
            ioException.printStackTrace();
        } finally {
            try {
                client.close();
            } catch (IOException ioException) {
                ioException.printStackTrace();
            }
        }

    }

}

按照关联度打分搜索结果

ssm项目整合.md
springCloud项目.md
MyBatis.md
springboot.md
MyBatisPlus.md
微服务.md
mini_cloud.md
redis.md
SpringSecurity.md
异常日志.md

转载请注明来源，欢迎对文章中的引用来源进行考证，欢迎指出任何有错误或不够清晰的表达。可以邮件至 1300452403@qq.com