一、导入jsoup的maven依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
二、运行代码 也可以自定义爬取的网站这里就以https://gank.io/special/Girl为例
package com.song;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SpyderTest {
/**
* [url=home.php?mod=space&uid=686208]@AuThor[/url] Maoyan
* [url=home.php?mod=space&uid=264116]@data[/url] 2020/3/31 12:30
* [url=home.php?mod=space&uid=1248337]@version[/url] 1.0.0
*/
private static HttpURLConnection conn = null;
private static InputStream inputStream = null;
private static BufferedInputStream bufferedInputStream = null;
private static FileOutputStream fileOutputStream = null;
//利用预编译正则表达式提高效率
private static Pattern pattern = Pattern.compile("/images/\w+");
public static void main(String[] args) throws IOException {
for (int i = 1; i <= 5; i++) {
//创建url
URL url = new URL("https://gank.io/special/Girl/page/" + i);
//创建doc对象
Document document = Jsoup.parse(url, 20 * 1000);
//获取img标签
Elements imgs = document.getElementsByTag("title");
Elements elements = document.select("a[title][target][style]");
for (Element e :
elements) {
String image = "第" + i + "页:" + "https://gank.io" + e.attr("style");
//正则表达式
Matcher matcher = pattern.matcher(image);
while (matcher.find()) {
System.out.println("第" + i + "页:" + "https://gank.io" + matcher.group());
//这里可以写成工具类
File file = new File("D:\picture\littlesister");
if (!file.isDirectory() && !file.exists()) {
//如果文件夹不存在则创建一个文件夹
file.mkdirs();
}
//生成一个唯一的标记来当作图片名称
String filename = UUID.randomUUID().toString();
fileOutputStream = new FileOutputStream(file + "\" + filename + ".jpeg");
//建立连接
URL imageurl = new URL("https://gank.io" + matcher.group());
conn = (HttpURLConnection) imageurl.openConnection();
//使用GET方法
conn.setRequestMethod("GET");
//连接指定的资源
conn.connect();
//获取输入流
inputStream = conn.getInputStream();
//新建buffer缓冲流包装输入流
bufferedInputStream = new BufferedInputStream(inputStream);
//创建大小为1k的"水桶"
byte[] bytes = new byte[1024];
//用于保存实际读取的字节数
int hasRead = 0;
//循环取水
System.out.println("开始下载第" + i + "页的图片");
while ((hasRead = inputStream.read(bytes)) > 0) {
fileOutputStream.write(bytes, 0, hasRead);
}
System.out.println("第" + i + "页的图片下载完成");
//释放资源
fileOutputStream.close();
bufferedInputStream.close();
inputStream.close();
}
}
}
}
}
上效果图

做壁纸啥的是够了,当然也可以适当的多喝营养快线~~~