zoukankan      html  css  js  c++  java
  • jsoup爬取网站图片



    package
    com.ij34.JsoupTest; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Random; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class JsoupTest { public static void downImages(String filePath,String imgUrl) throws Exception { //获取网址 String beforeUrl = imgUrl.substring(0,imgUrl.lastIndexOf("/")+1); //图片url后面的图片名字 String fileName = imgUrl.substring(imgUrl.lastIndexOf("/")+1); String newFileName = URLEncoder.encode(fileName, "UTF-8"); //"+"替换为UTF-8中的空格 newFileName = newFileName.replaceAll("\+", "\%20"); //编码之后的url imgUrl = beforeUrl + newFileName; //创建文件目录 File files = new File(filePath); if (!files.exists()) { files.mkdirs(); } URL url = new URL(imgUrl); HttpURLConnection connection = (HttpURLConnection)url.openConnection(); InputStream is = connection.getInputStream(); Date day=new Date(); SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmmss"); Random ra=new Random(); int Num=ra.nextInt(11)+100; String fn=df.format(day)+Num; //去图片的格式例如.jpg .jpeg int lastIndex=fileName.lastIndexOf("."); String result=fileName.substring(lastIndex); File file = new File(filePath +fn+ result); FileOutputStream out = new FileOutputStream(file); int i = 0; while((i = is.read()) != -1){ out.write(i); } } public static void main(String[] args) throws Exception { //int[] a=new int[]{}; //for(int i=a.length-1;i>=0;i--){ //爬取的网址 String url = "http://www.ivsky.com/tupian/laohu_v45527";//+a[i]; String savePath = "D://webmagic//"; Document document = Jsoup.connect(url).get(); Elements elements = document.getElementsByTag("img"); for(Element element : elements){ //图片的绝对路径 String imgSrc = element.attr("abs:src"); //取jpg格式 if(imgSrc.contains(".jpg")){ downImages(savePath, imgSrc); System.out.println(url+":"+imgSrc); } } // } } }

  • 相关阅读:
    【Win 10 应用开发】Toast通知激活应用——前台&后台
    MySQL深入理解
    mysql 索引中的USING BTREE 的意义
    关于PHP将对象数据写入日志的问题
    Golang学习笔记
    git flow 使用步骤
    git flow常用命令
    Nginx Log日志统计分析常用命令
    Nginx配置中的log_format用法梳理(设置详细的日志格式)
    回调函数的原理及PHP实例
  • 原文地址:https://www.cnblogs.com/tk55/p/8723757.html
Copyright © 2011-2022 走看看