zoukankan      html  css  js  c++  java
  • jsoup爬取网站图片



    package
    com.ij34.JsoupTest; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Random; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class JsoupTest { public static void downImages(String filePath,String imgUrl) throws Exception { //获取网址 String beforeUrl = imgUrl.substring(0,imgUrl.lastIndexOf("/")+1); //图片url后面的图片名字 String fileName = imgUrl.substring(imgUrl.lastIndexOf("/")+1); String newFileName = URLEncoder.encode(fileName, "UTF-8"); //"+"替换为UTF-8中的空格 newFileName = newFileName.replaceAll("\+", "\%20"); //编码之后的url imgUrl = beforeUrl + newFileName; //创建文件目录 File files = new File(filePath); if (!files.exists()) { files.mkdirs(); } URL url = new URL(imgUrl); HttpURLConnection connection = (HttpURLConnection)url.openConnection(); InputStream is = connection.getInputStream(); Date day=new Date(); SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmmss"); Random ra=new Random(); int Num=ra.nextInt(11)+100; String fn=df.format(day)+Num; //去图片的格式例如.jpg .jpeg int lastIndex=fileName.lastIndexOf("."); String result=fileName.substring(lastIndex); File file = new File(filePath +fn+ result); FileOutputStream out = new FileOutputStream(file); int i = 0; while((i = is.read()) != -1){ out.write(i); } } public static void main(String[] args) throws Exception { //int[] a=new int[]{}; //for(int i=a.length-1;i>=0;i--){ //爬取的网址 String url = "http://www.ivsky.com/tupian/laohu_v45527";//+a[i]; String savePath = "D://webmagic//"; Document document = Jsoup.connect(url).get(); Elements elements = document.getElementsByTag("img"); for(Element element : elements){ //图片的绝对路径 String imgSrc = element.attr("abs:src"); //取jpg格式 if(imgSrc.contains(".jpg")){ downImages(savePath, imgSrc); System.out.println(url+":"+imgSrc); } } // } } }

  • 相关阅读:
    轻松搭建基于 SpringBoot + Vue 的 Web 商城应用
    Serverless 实战 —— Funcraft + OSS + ROS 进行 CI/CD
    急速搭建 Serverless AI 应用:为你写诗
    O'Reilly 1500 份问卷调研:2019 年 Serverless 落地到底香不香?
    2019 阿里巴巴云原生这一年
    快速部署 Spring PetClinic 到函数计算平台
    1354. Construct Target Array With Multiple Sums
    1352. Product of the Last K Numbers
    1351. Count Negative Numbers in a Sorted Matrix
    1347. Minimum Number of Steps to Make Two Strings Anagram
  • 原文地址:https://www.cnblogs.com/tk55/p/8723757.html
Copyright © 2011-2022 走看看