ES文档搜索
ElasticSearch与Kibana安装以及ik分词器安装请看之前的文章,这里就不做过多赘述。
ES语言命名规范
仅可能为小写字母,不能下划线开头
索引库名称,包括属性名称都用小写字母加下划线的方式命名
创建索引库
需要提前安装ik分词器
创建索引库,包含两个属性,文件名称和文本内容,使用ik_smart
分词
PUT /text_file_doc
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"file_name":{
"type": "text",
"analyzer": "ik_smart"
},
"content":{
"type": "text",
"analyzer": "ik_smart"
}
}
}
}
使用Java ES客户端
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.6.1</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.10.0</version>
</dependency>
一个是ES的java客户端,还有一个是用于封装JSON字符串的依赖
需要将日志文件添加到resource文件下
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
</Console>
<!-- 添加其他 appender,如 RollingFile 等 -->
</Appenders>
<Loggers>
<Root level="info">
<AppenderRef ref="Console"/>
<!-- 添加其他 appender 的引用 -->
</Root>
</Loggers>
</Configuration>
索引库相关操作
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.GetIndexResponse;
import java.io.IOException;
public class ESTest {
public static RestHighLevelClient client;
static {
client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.119.88", 9200, "http")));
}
public static void main(String[] args) {
ESUtils esUtils = new ESUtils(client);
try {
//创建索引
boolean created = esUtils.createIndex("text_file_doc");
System.out.println(created);
//查询索引消息
GetIndexResponse response = esUtils.getIndex("text_file_doc");
System.out.println(response.getAliases());
System.out.println(response.getMappings());
System.out.println(response.getSettings());
//删除索引
boolean result = esUtils.deleteIndex("text_file_doc");
System.out.println(result);
} catch (IOException ioException) {
ioException.printStackTrace();
} finally {
try {
client.close();
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
}
}
工具类
本文所使用到的ES工具类
import com.fasterxml.jackson.databind.ObjectMapper;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.client.indices.GetIndexResponse;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilder;
import java.io.IOException;
import java.util.List;
public class ESUtils {
public SearchUtils searchUtils;
public static ObjectMapper objectMapper = new ObjectMapper();
private RestHighLevelClient client;
public ESUtils(RestHighLevelClient client){
this.client = client;
this.searchUtils = new SearchUtils(client);
}
class SearchUtils {
private RestHighLevelClient client;
public SearchUtils(RestHighLevelClient client) {
this.client = client;
}
public SearchResponse search(String indexName, QueryBuilder queryBuilder) throws IOException {
SearchRequest request = new SearchRequest(indexName);
request.source().query(queryBuilder);
SearchResponse searchResponse = client.search(request, RequestOptions.DEFAULT);
return searchResponse;
}
}
/**
* 创建索引库,使用默认settings,默认mappings
* @param indexName 索引库名称
* @return
* @throws IOException
*/
public boolean createIndex(String indexName) throws IOException {
CreateIndexRequest request = new CreateIndexRequest(indexName);
CreateIndexResponse response = client.indices().create(request, RequestOptions.DEFAULT);
return response.isAcknowledged();
}
/**
* 获取索引库信息
* @param indexName 索引库名称
* @return
* @throws IOException
*/
public GetIndexResponse getIndex(String indexName) throws IOException {
GetIndexRequest getIndexRequest = new GetIndexRequest(indexName);
GetIndexResponse response = client.indices().get(getIndexRequest, RequestOptions.DEFAULT);
return response;
}
/**
* 删除索引库
* @param indexName 索引库名称
* @return
* @throws IOException
*/
public boolean deleteIndex(String indexName) throws IOException {
DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest(indexName);
AcknowledgedResponse response = client.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
return response.isAcknowledged();
}
/**
* 添加文档到索引库
* @param indexName 索引库名称
* @param docId 指定文档id
* @param doc 文档对象
* @return
* @throws IOException
*/
public IndexResponse addDoc(String indexName,String docId,Object doc) throws IOException {
IndexRequest request = new IndexRequest();
request.index(indexName).id(docId);
String data = objectMapper.writeValueAsString(doc);
request.source(data, XContentType.JSON);
IndexResponse response = client.index(request, RequestOptions.DEFAULT);
return response;
}
/**
* 添加文档到索引库
* @param indexName 索引库名称
* @param doc 文档对象
* @return
* @throws IOException
*/
public IndexResponse addDoc(String indexName,Object doc) throws IOException {
IndexRequest request = new IndexRequest();
request.index(indexName);
String data = objectMapper.writeValueAsString(doc);
request.source(data, XContentType.JSON);
IndexResponse response = client.index(request, RequestOptions.DEFAULT);
return response;
}
/**
* 批量添加文档
* @param indexName 索引库名称
* @param docs 文档列表
* @throws IOException
*/
public BulkResponse bulkAddDoc(String indexName, List<?> docs) throws IOException {
BulkRequest bulkRequest = new BulkRequest();
for (Object doc : docs) {
IndexRequest request = new IndexRequest();
request.index(indexName);
String data = objectMapper.writeValueAsString(doc);
request.source(data,XContentType.JSON);
bulkRequest.add(request);
}
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
return bulkResponse;
}
}
创建索引的方法没有提前指定mappings,因此没有约束
如果在创建时要指定mappings和settings等约束,使用
CreateIndexRequest request = new CreateIndexRequest(indexName);
request.source(source,XContentType.JSON);
查询测试
GET /text_file_doc/_search
{
"query":{
"match":{
"context":"docker"
}
}
}
{
"took" : 4424,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.57595575,
"hits" : [
{
"_index" : "text_file_doc",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.57595575,
"_source" : {
"fileName" : "ElasticSearch入门",
"context" : """# 分布式搜索ES
## Elastic Search简介
**什么是elasticsearch?**
elasticsearch是一款非常强大的开源搜索引擎,可以帮助我们从海量数据中快速找到需要的内容
elasticsearch结合kibana、Logstash、Beats,也就是**elastic stack(ELK)**,被广泛应用在日志数据分析、实时监控等领域
查询结果如上
清空索引库方法
POST /text_file_doc/_delete_by_query
{
"query": {
"match_all": {}
}
}
批量插入文档
下面是一个简单的需求,将电脑指定目录下的所有markdown文件存入ES并创建倒排索引,然后通过关键词进行文档搜索。
创建文本文档java类
封装对应的文档类
public class TextFileDoc {
private String fileName;
private String context;
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public String getContext() {
return context;
}
public void setContext(String context) {
this.context = context;
}
}
文档读取工具
文档读取工具,到指定目录下读取对应类型的文档信息
import com.os467.entity.TextFileDoc;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class ESLoadLongTextDocument {
private StringBuilder stringBuilder = new StringBuilder();
/**
* 读取该目录下所有指定类型的文档信息
* @param dirPath 目录路径
* @param extraName 后缀名 如 docx md txt
*/
public List<TextFileDoc> loadAllDocumentFile(String dirPath,String extraName) throws IOException {
List<TextFileDoc> textFileDocList = new ArrayList<>();
File file = new File(dirPath);
if (file.exists()){
readFile(file,extraName,textFileDocList);
}
return textFileDocList;
}
private void readFile(File file, String extraName,List<TextFileDoc> textFileDocs) throws IOException {
if (file.isDirectory()){
File[] files = file.listFiles();
for (File f : files) {
readFile(f,extraName,textFileDocs);
}
}else {
if (file.getName().endsWith("."+extraName)){
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
char[] chars = new char[1024 * 1024];
int len;
while ((len = bufferedReader.read(chars)) != -1){
stringBuilder.append(chars,0,len);
}
TextFileDoc textFileDoc = new TextFileDoc();
textFileDoc.setFileName(file.getName());
textFileDoc.setContext(stringBuilder.toString());
textFileDocs.add(textFileDoc);
stringBuilder.setLength(0);
}
}
}
}
批量插入
import com.os467.entity.TextFileDoc;
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import java.io.IOException;
import java.util.List;
public class ESTest {
public static RestHighLevelClient client;
static {
client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.119.88", 9200, "http")));
}
public static void main(String[] args) {
ESUtils esUtils = new ESUtils(client);
try {
ESLoadLongTextDocument esLoadLongTextDocument = new ESLoadLongTextDocument();
List<TextFileDoc> fileDocs = esLoadLongTextDocument.loadAllDocumentFile("D:\\md笔记\\java后端", "md");
BulkResponse response = esUtils.bulkAddDoc("text_file_doc", fileDocs);
System.out.println(response.status());
} catch (IOException ioException) {
ioException.printStackTrace();
} finally {
try {
client.close();
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
}
}
查询结果
import com.os467.entity.TextFileDoc;
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import java.io.IOException;
public class ESTest {
public static RestHighLevelClient client;
static {
client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.119.88", 9200, "http")));
}
public static void main(String[] args) {
ESUtils esUtils = new ESUtils(client);
try {
ESUtils.SearchUtils searchUtils = esUtils.searchUtils;
SearchResponse response =
searchUtils.search("text_file_doc", QueryBuilders.matchQuery("context", "mybatis配置"));
SearchHits hits = response.getHits();
for (SearchHit hit : hits.getHits()) {
String source = hit.getSourceAsString();
TextFileDoc textFileDoc = ESUtils.objectMapper.readValue(source, TextFileDoc.class);
System.out.println(textFileDoc.getFileName());
}
} catch (IOException ioException) {
ioException.printStackTrace();
} finally {
try {
client.close();
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
}
}
按照关联度打分搜索结果
ssm项目整合.md
springCloud项目.md
MyBatis.md
springboot.md
MyBatisPlus.md
微服务.md
mini_cloud.md
redis.md
SpringSecurity.md
异常日志.md
转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以邮件至 1300452403@qq.com