环境搭建
创建一个SpringBoot项目。
配置

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
| <properties> <java.version>1.8</java.version> <elasticsearch.version>7.6.2</elasticsearch.version> </properties>
<dependencies> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.62</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-elasticsearch</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-thymeleaf</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency>
<dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-devtools</artifactId> <scope>runtime</scope> <optional>true</optional> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-configuration-processor</artifactId> <optional>true</optional> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <optional>true</optional> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> <exclusions> <exclusion> <groupId>org.junit.vintage</groupId> <artifactId>junit-vintage-engine</artifactId> </exclusion> </exclusions> </dependency> </dependencies>
|

1 2
| spring.thymeleaf.cache=false
|

导入页面资料。这个在Elasticsearch概述中留有百度云链接
controller

1 2 3 4 5 6 7 8
| @Controller public class IndexController {
@RequestMapping({"/","/index"}) public String index(){ return "index"; } }
|
启动项目查看效果~ http:localhost:8080/

Jsoup解析
1 2 3 4 5 6
| <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency>
|



创建utils包,创建HtmlParseUtil类
通过对其网站的分析,就应该能看懂下面的代码了

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| @Component public class HtmlParseUtil {
public static void main(String[] args) throws IOException { String url = "https://search.jd.com/Search?keyword=java"; Document document = Jsoup.parse(new URL(url), 30000); Element element = document.getElementById("J_goodsList"); System.out.println(element.html()); Elements elements = element.getElementsByTag("li"); for (Element e1 : elements) { String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img"); String price = e1.getElementsByClass("p-price").eq(0).text(); String title = e1.getElementsByClass("p-name").eq(0).text(); System.out.println("========================================"); System.out.println(img); System.out.println(price); System.out.println(title); } } }
|

成功获取到相应信息。接着对其进行封装。首先创建一个pojo Content对象

然后封装成一个parseJD
方法。


1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
| @Component public class HtmlParseUtil {
public static void main(String[] args) throws IOException { new HtmlParseUtil().parseJD("Vue").forEach(System.out::println); }
public ArrayList<Content> parseJD(String keywords) throws IOException { String url = "https://search.jd.com/Search?keyword=" + keywords; System.out.println(url); Document document = Jsoup.parse(new URL(url), 30000); Element element = document.getElementById("J_goodsList"); Elements elements = element.getElementsByTag("li");
ArrayList<Content> goodsList = new ArrayList<>(); for (Element e1 : elements) { String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img"); String price = e1.getElementsByClass("p-price").eq(0).text(); String title = e1.getElementsByClass("p-name").eq(0).text();
Content content = new Content(); content.setImg(img); content.setTitle(title); content.setPrice(price); goodsList.add(content); } return goodsList; } }
|
业务编写
首先照样配置Elastcisearch的配置类。

1 2 3 4 5 6 7 8 9 10 11
| @Configuration public class ElasticSearchClientConfig {
@Bean public RestHighLevelClient restHighLevelClient() { RestHighLevelClient client = new RestHighLevelClient( RestClient.builder( new HttpHost("localhost", 9200, "http"))); return client; } }
|
编写service业务类

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| @Service public class ContentService {
@Autowired RestHighLevelClient restHighLevelClient;
public Boolean parseContent(String keywords) throws IOException { ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("2m");
for (int i = 0 ;i < contents.size();i++){ System.out.println(JSON.toJSONString(contents.get(i))); bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON)); } BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); return !bulk.hasFailures(); } }
|
编写controoler

1 2 3 4 5 6 7 8 9 10 11 12
| @RestController public class ContentController {
@Autowired ContentService contentService;
@GetMapping("/parse/{keyword}") public Boolean parse(@PathVariable("keyword") String keyword) throws IOException { Boolean result = contentService.parseContent(keyword); return result; } }
|
启动项目进行测试http://localhost:8080/parse/java


成功添加相关javas商品资料。
接着我们继续编写service,添加分页搜索ES中的数据。

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
| @Service public class ContentService {
@Autowired RestHighLevelClient restHighLevelClient;
public Boolean parseContent(String keywords) throws IOException { ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("2m");
for (int i = 0 ;i < contents.size();i++){ System.out.println(JSON.toJSONString(contents.get(i))); bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON)); } BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); return !bulk.hasFailures(); }
public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException { if (pageNo<=1){ pageNo = 1; }
SearchRequest searchRequest = new SearchRequest("jd_goods"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.from(pageNo); sourceBuilder.size(pageSize);
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword); sourceBuilder.query(termQueryBuilder); sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
searchRequest.source(sourceBuilder); SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
ArrayList<Map<String,Object>> list = new ArrayList<>();
SearchHit[] hits = searchResponse.getHits().getHits(); for (SearchHit documentFields : hits){ list.add(documentFields.getSourceAsMap()); } return list; } }
|
接着在controller中添加一个请求

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| @RestController public class ContentController {
@Autowired ContentService contentService;
@GetMapping("/parse/{keyword}") public Boolean parse(@PathVariable("keyword") String keyword) throws IOException { Boolean result = contentService.parseContent(keyword); return result; }
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}") public List<Map<String,Object>> search(@PathVariable("keyword") String keyword, @PathVariable("pageNo") int pageNo, @PathVariable("pageSize") int pageSize) throws IOException {
List<Map<String, Object>> list = contentService.searchPage(keyword, pageNo, pageSize); return list; } }
|
启动项目进行测试http://localhost:8080/search/java/1/20

前端页面
导入vue 和 axios,我这里使用的是在线版的
<script src="https://cdn.staticfile.org/vue/2.6.2/vue.min.js"></script>
<script src="https://unpkg.com/axios/dist/axios.min.js"></script>
修改我们的index页面。

启动项目查看效果。(我已经解析过了vue数据添加到了es中)

高亮功能
我们在业务类service中修改一下代码。

将高亮中的字段替换添加到_source中的title

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
| @Service public class ContentService {
@Autowired RestHighLevelClient restHighLevelClient;
public Boolean parseContent(String keywords) throws IOException { ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("2m");
for (int i = 0 ;i < contents.size();i++){ System.out.println(JSON.toJSONString(contents.get(i))); bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON)); } BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); return !bulk.hasFailures(); }
public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException { if (pageNo<=1){ pageNo = 1; }
SearchRequest searchRequest = new SearchRequest("jd_goods"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.from(pageNo); sourceBuilder.size(pageSize);
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword); sourceBuilder.query(termQueryBuilder); sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
HighlightBuilder highlightBuilder = new HighlightBuilder(); highlightBuilder.field("title"); highlightBuilder.requireFieldMatch(false); highlightBuilder.preTags("<span style='color:red'>"); highlightBuilder.postTags("</span>"); sourceBuilder.highlighter(highlightBuilder);
searchRequest.source(sourceBuilder); SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
ArrayList<Map<String,Object>> list = new ArrayList<>();
SearchHit[] hits = searchResponse.getHits().getHits(); for (SearchHit hit : hits){ Map<String, HighlightField> highlightFields = hit.getHighlightFields(); Map<String, Object> sourceAsMap = hit.getSourceAsMap(); HighlightField title = highlightFields.get("title"); if (title!=null){ Text[] fragments = title.fragments(); String hTitle = ""; for (Text text : fragments) { hTitle += text; } sourceAsMap.put("title",hTitle); } list.add(sourceAsMap); } return list; } }
|

1 2 3
| <p class="productTitle"> <a v-html="result.title"> </a> </p>
|
重启服务,访问测试。http://localhost:8080/

完成!