ElasticSearch集成SpringBoot实现模仿京东搜索引擎
使用的版本:
SpringBoot:2.3.4.RELEASE
ElasticSearch:7.17.3 --和本机的版本一致
1.创建空的Maven项目并指定使用的Springboot和ElasticSearch的版本
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.4.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>org.example</groupId>
<artifactId>springboot-elasticsearch</artifactId>
<version>1.0-SNAPSHOT</version>
<!--pom.xml-->
<properties>
<java.version>1.8</java.version>
<elasticsearch.version>7.17.3</elasticsearch.version>
</properties>
Springboot的版本在parent中指定
2.引入依赖
<dependencies>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>7.17.3</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
<version>2.3.4.RELEASE</version>
</dependency>
<dependency>
<groupId>co.elastic.clients</groupId>
<artifactId>elasticsearch-java</artifactId>
<version>7.17.3</version>
<exclusions>
<exclusion>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>7.17.3</version>
<exclusions>
<exclusion>
<artifactId>elasticsearch</artifactId>
<groupId>org.elasticsearch</groupId>
</exclusion>
<exclusion>
<artifactId>elasticsearch-core</artifactId>
<groupId>org.elasticsearch</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<scope>runtime</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.73</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
</dependencies>
3.配置Elasticsearch配置类,添加Bean
package com.qidi.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* HEU
* Harbin Engineering University
*
* @author QiDi
* @date 2022/12/15 15:16
*/
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost",9200,"http")
)
);
return client;
}
}
4.配置实体类
package com.qidi.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* HEU
* Harbin Engineering University
*
* @author QiDi
* @date 2022/12/15 15:26
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
public class HtmlContent {
private String title;
private String price;
private String imgUrl;
}
5.配置工具类
爬取JD页面数据工具类
package com.qidi.utils;
import com.qidi.entity.HtmlContent;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* HEU
* Harbin Engineering University
*
* @author QiDi
* @date 2022/12/15 15:28
*/
public class HtmlParseUtils {
/**
* 传入一个keyword,将会自动解析该keyword对应的JD商品页面
* 并将解析后的HtmlContent进行return
*
* @param keyword
* @return
*/
public List<HtmlContent> parseJD(String keyword) throws IOException {
//需要解析的JD商品页面
String url = "https://search.jd.com/Search?keyword=" + keyword;
//通过Jsoup解析JD商品页面
Document document = Jsoup.parse(new URL(url), 30000);
//通过分析JD的商品都存在J_goodsList这个div中
Element divElement = document.getElementById("J_goodsList");
//JD的商品信息都存在J_goodsList这个div下的li标签中
//取出所有的li标签 构成lis
Elements lis = divElement.getElementsByTag("li");
//用于存放遍历出的商品信息
ArrayList<HtmlContent> contents = new ArrayList<>();
//对lis进行遍历取出每个li标签中的title、img和price相关信息
//并将这些信息存到刚刚new出的contents中
for (Element li : lis) {
//取出title相关信息
String title = li.getElementsByClass("p-name").eq(0).text();
//取出img的src
String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img");
//取出price的相关信息
String price = li.getElementsByClass("p-price").eq(0).text();
contents.add(new HtmlContent(title,price,img));
}
return contents;
}
}
索引常量工具类
package com.qidi.utils;
import java.util.Stack;
/**
* HEU
* Harbin Engineering University
*
* @author QiDi
* @date 2022/12/15 18:53
*/
public class CommonConstant {
public static final String INDEX = "test_jd_goods";
}
6.编写Service层
package com.qidi.service;
import com.alibaba.fastjson.JSON;
import com.qidi.entity.HtmlContent;
import com.qidi.utils.CommonConstant;
import com.qidi.utils.HtmlParseUtils;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.AbstractHighlighterBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import javax.naming.directory.SearchResult;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
/**
* HEU
* Harbin Engineering University
*
* @author QiDi
* @date 2022/12/15 18:42
*/
@Service
public class ContentService {
@Autowired
RestHighLevelClient restHighLevelClient;
/**
* 将这些数据插入到Elasticsearch的索引库中
*
* @param keyword
* @return
* @throws IOException
*/
public Boolean addContent(String keyword) throws IOException {
//应用刚刚编写的HtmlParseUtil解析关键词对应的JD商品信息--获取到JD商品信息对应的contents
HtmlParseUtils htmlParseUtils = new HtmlParseUtils();
List<HtmlContent> contents = htmlParseUtils.parseJD(keyword);
//将刚刚解析的数据放到Elasticsearch中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m"); //设置超时时间为2min
for (HtmlContent content : contents) {
//将遍历出的content添加到test_jd_goods这个索引中
bulkRequest.add(
new IndexRequest(CommonConstant.INDEX)
.source(JSON.toJSONString(content)
, XContentType.JSON));
}
//执行批量插入文档的请求 返回一个Response
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
//返回bulk指令是否执行成功
return !bulk.hasFailures();
}
public List<Map<String, Object>> searchPageForHighLight(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo <= 1) {
pageNo = 1;
}
//条件搜索
SearchRequest searchRequest = new SearchRequest(CommonConstant.INDEX);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//分页
searchSourceBuilder.from(pageNo);
searchSourceBuilder.size(pageSize);
//精确查找
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
searchSourceBuilder.query(termQueryBuilder);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.requireFieldMatch(false)//关闭多个高亮显示
.preTags("<span style='color:red'>").postTags("</span>");
searchSourceBuilder.highlighter(highlightBuilder);
//执行搜索
searchRequest.source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
List<Map<String, Object>> list = new ArrayList<>();
for (SearchHit documentField : response.getHits().getHits()) {
Map<String, HighlightField> highlightFields = documentField.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> sourceAsMap = documentField.getSourceAsMap(); // 原来的结果
// 解析高亮的字段, 将原来的字段换为我们高亮的字段即可!
if (title != null) {
Text[] fragments = title.fragments();
String n_title = "";
for (Text text : fragments) {
n_title += text;
}
sourceAsMap.put("title", n_title);
}
list.add(sourceAsMap);
}
return list;
}
}
7.编写Controller层
package com.qidi.controller;
import com.qidi.service.ContentService;
import com.sun.org.apache.xpath.internal.operations.Bool;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* HEU
* Harbin Engineering University
*
* @author QiDi
* @date 2022/12/15 19:07
*/
@RestController
public class ContentController {
@Autowired
ContentService contentService;
@GetMapping("/parse/{keyword}")
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
return contentService.addContent(keyword);
}
@GetMapping("/search/{keywords}/{pageNo}/{pageSize}")
public List<Map<String, Object>> search(@PathVariable("keywords") String keywords, @PathVariable("pageNo") int pageNo, @PathVariable("pageSize") int pageSize) throws IOException {
return contentService.searchPageForHighLight(keywords, pageNo, pageSize);
}
}
package com.qidi.controller;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
/**
* HEU
* Harbin Engineering University
*
* @author QiDi
* @date 2022/12/15 19:04
*/
@Controller
public class IndexController {
@GetMapping({
"/", "/index"})
public String index(){
return "index";
}
}
gitee地址:https://gitee.com/zidiqqq6/springboot-elasticsearch.git