1.新建maven项目,添加Jsoup的依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>Jsoup-demo</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.12</version>
</dependency>
</dependencies>
</project>
2.新建实体类,代码如下:
1 package cn.lxcourse.jsoup.pojo;
2
3 import lombok.AllArgsConstructor;
4 import lombok.Data;
5 import lombok.NoArgsConstructor;
6
7 @Data
8 @NoArgsConstructor
9 @AllArgsConstructor
10 public class Content {
11 private String price;
12 private String title;
13 private String imgSrc;
14 }
3.编写工具类,代码如下:
1 package cn.lxcourse.jsoup.util;
2
3 import cn.lxcourse.jsoup.pojo.Content;
4 import org.jsoup.Jsoup;
5 import org.jsoup.nodes.Document;
6 import org.jsoup.nodes.Element;
7 import org.jsoup.select.Elements;
8
9 import java.net.URL;
10 import java.util.ArrayList;
11 import java.util.List;
12
13 /**
14 * 爬虫工具
15 */
16 public class JsoupUtils {
17
18 /**
19 * 爬取京东商品列表
20 * @param keywords
21 * @return
22 * @throws Exception
23 */
24 public static List<Content> getJDGoods(String keywords) throws Exception {
25 String url = "https://search.jd.com/Search?keyword=Java" + keywords;
26 Document document = Jsoup.parse(new URL(url), 300000);
27 //商品列表
28 Element j_goodsList = document.getElementById("J_goodsList");
29 Elements glEtemElements = j_goodsList.getElementsByClass("gl-item");
30
31 List<Content> list = new ArrayList<>();
32 for (Element element : glEtemElements) {
33
34 String imgSrc = element.getElementsByTag("img").eq(0).attr("source-data-lazy-img");
35 String price = element.getElementsByClass("p-price").eq(0).text();
36 String title = element.getElementsByClass("p-name").eq(0).text();
37
38 Content content = new Content();
39 content.setImgSrc(imgSrc);
40 content.setPrice(price);
41 content.setTitle(title);
42
43 list.add(content);
44 }
45
46 return list;
47 }
48
49 /**
50 * 爬取工行融e购商品列表
51 * @param keywords
52 * @return
53 * @throws Exception
54 */
55 public static List<Content> getRongYiGouGoods(String keywords) throws Exception {
56 //https://mall.icbc.com.cn/searchproducts/pv.jhtml?query=java
57 String url = "https://mall.icbc.com.cn/searchproducts/pv.jhtml?query=" + keywords;
58
59 Document document = Jsoup.parse(new URL(url), 30000);
60 Element ajaxQueryContent = document.getElementById("ajaxQueryContent");
61
62 Elements liElements = ajaxQueryContent.getElementsByTag("li");
63
64 List<Content> list = new ArrayList<>();
65
66 for (Element el : liElements) {
67 String src = el.getElementsByTag("img").eq(0).attr("src");
68 String price = el.getElementsByClass("p-price").eq(0).text();
69 String title = el.getElementsByClass("p-name").eq(0).select("a").eq(0).attr("title");
70 Content content = new Content();
71 content.setTitle(title);
72 content.setPrice(price);
73 content.setImgSrc(src);
74 list.add(content);
75 }
76
77 return list;
78 }
79
80 public static void main(String[] args) throws Exception {
81 //getJDGoods("Java").forEach(System.out::println);
82 getRongYiGouGoods("java").forEach(System.out::println);
83 }
84 }