zoukankan      html  css  js  c++  java
  • 爬取网站图片保存到本地

      

      有时候我们需要在爬虫的时候将网站的图片保存到本地,这就需要我们先获取到图片的url,然后利用url再去下载图片到本地。

      下面介绍两种简单的方法:

    1.利用java自带的URLConnection

      此方法我还没有找到可以携带cookie或者其他信息去下载图片的方法。

    package cn.qlq.craw.Jsoup;
    
    import java.io.FileOutputStream;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLConnection;
    
    import org.jsoup.Connection;
    import org.jsoup.Jsoup;
    
    /**
     * url获取图片并且保存到本地
     * 
     * @author liqiang
     *
     */
    public class UrlConnectionGetPicture {
        
        public static void main(String[] args) throws Exception {
            String url  = "http://qiaoliqiang.cn/fileDown/zfb.bmp";
            URL url1 = new URL(url);
            URLConnection conn = url1.openConnection();
            InputStream inputStream = conn.getInputStream();
            String path = "C:\Users\liqiang\Desktop\实习\python\javaCrawPicture\test.bmp";
            OutputStream out = new FileOutputStream(path);
            byte[] buff = new byte[1024];
            int i = -1;
            while(( i = inputStream.read(buff))!= -1){
                out.write(buff, 0 , i);
            }
            inputStream.close();
            out.close();
        }
        
    }

    补充:org.apache.commons.io.IOUtils可以简单的将一个inputStream的文件读取到另一个outputStream,实现文件的拷贝,例如:

      只用传两个参数,第一个传递InputStream,第二个传递OutputStream

    package cn.qlq.craw.Jsoup;
    
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.net.URL;
    import java.net.URLConnection;
    
    import org.apache.commons.io.IOUtils;
    
    public class IOutilsDownloadFile {
        public static void main(String[] args) throws IOException {
            String url = "http://qiaoliqiang.cn/fileDown/zfb.bmp";
            URL url1 = new URL(url);
            URLConnection conn = url1.openConnection();
            InputStream inputStream = conn.getInputStream();
            String path = "C:\Users\liqiang\Desktop\test.bmp";
            OutputStream outputStream = new FileOutputStream(path);
            // 利用IOutiks拷贝文件,简单快捷
            IOUtils.copy(inputStream, outputStream);
        }
    }

     2.利用Jsoup。此方法可以在下载图片的时候携带cookie或者携带一些额外的数据(重要)

     下面是不携带cookie的方法:

    package cn.qlq.craw.Jsoup;
    
    import java.io.BufferedOutputStream;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import org.jsoup.Connection;
    import org.jsoup.Jsoup;
    /**
     * Jsoup下载图片并保存到本地
     * @author liqiang
     *
     */
    public class JsoupDoloadPicture {
    
        /**
         * @param args
         * @throws IOException 
         */
        public static void main(String[] args) throws IOException {
            // TODO Auto-generated method stub
            String imageSrc = "http://newjwc.tyust.edu.cn/CheckCode.aspx";
            Connection.Response response = Jsoup.connect(imageSrc).ignoreContentType(true).execute();
            byte[] img = response.bodyAsBytes();
            System.out.println(img.length);
            savaImage(img, "C:\Users\liqiang\Desktop\实习\python\javaCrawJWXT", "test.png");    
        }
    
        public static void savaImage(byte[] img,String filePath,String fileName) {
            BufferedOutputStream bos = null;
            FileOutputStream fos = null;
            File file = null;
            File dir = new File(filePath);
            try {
                //判断文件目录是否存在
                if(!dir.exists() && dir.isDirectory()){
                    dir.mkdir();
                }
                file = new File(filePath+"\"+fileName);
                fos = new FileOutputStream(file);
                bos = new BufferedOutputStream(fos);
                bos.write(img);
            } catch (FileNotFoundException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }finally{
                if(bos!=null){
                    try {
                        bos.close();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                if(fos!=null){
                    try {
                        fos.close();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
            }
            
            
            
        }
    }

     下面是携带cookie的方法,此方法可以在爬虫需要登录网站的时候带着cookie去获取验证码。

    package cn.qlq.craw.JsoupCrawJWXT;
    
    import java.io.BufferedOutputStream;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.util.Map;
    
    import org.jsoup.Connection;
    import org.jsoup.Jsoup;
    
    /**
     * Jsoup下载图片并保存到本地
     * 
     * @author liqiang
     *
     */
    public class JsoupDoloadPicture {
    
        /**
         * 带着cookie下载验证码图片
         * 
         * @param url
         * @param cookies
         * @throws IOException
         */
        public static void downloadImg(String url, Map<String, String> cookies) throws IOException {
            // TODO Auto-generated method stub
            Connection connect = Jsoup.connect(url);
            connect.cookies(cookies);// 携带cookies爬取图片
            connect.timeout(5 * 10000);
            Connection.Response response = connect.ignoreContentType(true).execute();
            byte[] img = response.bodyAsBytes();
            System.out.println(img.length);
            // 读取文件存储位置
            String directory = ResourcesUtil.getValue("path", "file");
            savaImage(img, directory, "yzm.png");
        }
    
        public static void savaImage(byte[] img, String filePath, String fileName) {
            BufferedOutputStream bos = null;
            FileOutputStream fos = null;
            File file = null;
            File dir = new File(filePath);
            try {
                // 判断文件目录是否存在
                if (!dir.exists() && dir.isDirectory()) {
                    dir.mkdir();
                }
                file = new File(filePath + "\" + fileName);
                fos = new FileOutputStream(file);
                bos = new BufferedOutputStream(fos);
                bos.write(img);
                System.out.println("验证码已经下载到:"+filePath);
            } catch (FileNotFoundException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } finally {
                if (bos != null) {
                    try {
                        bos.close();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                if (fos != null) {
                    try {
                        fos.close();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
            }
    
        }
    }
  • 相关阅读:
    raspbian设置locale
    docker redis
    consul开发部署集群
    自动更新ssh登录的key到远程主机
    docker官方脚本阿里云镜像
    nodejs中国镜像
    github修改hosts加速
    docker容器更新总是自动重启
    webstrom不显示右边的竖线,代码结构线
    _mssql.c:266:22: fatal error: sqlfront.h: No such file or directory
  • 原文地址:https://www.cnblogs.com/qlqwjy/p/8886963.html
Copyright © 2011-2022 走看看