zoukankan      html  css  js  c++  java
  • 抓取页面图片元素并保存到本机电脑

    在这里主要通过流分析,通过java模拟访问页面获取到页面的html元素,并通过jsoup来分析获取到的html元素,

    然后通过流处理来将图片保存到本机

    package getpicture;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStreamWriter;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.Scanner;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
     
    
    public class getPicture {
        
        public static void main(String[] args) {
            new Thread(new Spider()).start();
        }
    }
     
    // 抓网页, 并分析出图片地址
    class Spider implements Runnable {
        private String firstUrl = "http://jandan.net/ooxx/page-"; //1111#comments
        private String connUrl = "#comments";
        private int beginIndex = 1115;
        private String preHtml;
        //private String testPath="http://www.mop.com/#";
        private String mSavePath;
         
        public Spider() {};
         
        @Override
        public void run() {
            try {
                URL newURL = new URL(firstUrl + beginIndex + connUrl);
                //URL newURL = new URL(testPath);
                HttpURLConnection conn = (HttpURLConnection) newURL.openConnection();
                conn.setRequestProperty("Connection","keep-alive");
                conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36");
                conn.setDoInput(true);
                conn.setDoOutput(true);
                OutputStreamWriter out = new OutputStreamWriter(conn.getOutputStream(),"utf-8");
                out.flush();
                out.close(); 
                InputStream inputStream = conn.getInputStream();
                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "utf-8"));
                String line;
                //读取页面html元素
                while ((line = reader.readLine()) != null) { 
                    preHtml+=line; 
                }
                System.out.println(preHtml);
                //当页面访问成功时,解析页面元素,获取页面图片元素
                if(conn.getResponseCode()==200){
                    Document doc=Jsoup.parse(preHtml);
                    Elements elements = doc.select(".row img");
                    for(Element e : elements) {
                        String imgSrc = e.attr("src");
                        new Thread(new DownloadImage(imgSrc)).start();
                    }
                }
            }catch(Exception e) {
                e.printStackTrace();
            }
        }
    }
     
    
    class DownloadImage implements Runnable {
        private String imageSrc;
        private String imageName;
        public DownloadImage(String imageSrc) {
            this.imageSrc = imageSrc;
        }
        
        @Override
        public void run() {
            String[] splits = imageSrc.split("/");
            imageName = splits[splits.length - 1];
            Date date=new Date();
            SimpleDateFormat sdf=new SimpleDateFormat("yyyyMMdd");       
            String random=sdf.format(date);
            File file = new File("E:\picture\"+sdf+"\"+imageName);
             // 如果路径不存在,则创建  
            if (!file.getParentFile().exists()) {  
                file.getParentFile().mkdirs();  
            } 
            //判断文件是否存在,不存在就创建文件
            if(!file.exists()&& !file .isDirectory()) {
                try {
                    file.createNewFile();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }      
            System.out.println("开始下载图片:" + imageName);        
            try {
                URL newURL = new URL("http:"+imageSrc);
                HttpURLConnection conn = (HttpURLConnection) newURL.openConnection();
                conn.setRequestProperty("Connection","keep-alive");
                conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36");
                conn.setDoInput(true);
                conn.setDoOutput(true);
                //通过输入流获取图片数据
                InputStream inputStream = conn.getInputStream();
                //BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
                byte[] data=new byte[1024];
                //创建输出流   
                FileOutputStream fos = new FileOutputStream(file);         
                int len = 0;             
                //使用一个输入流从buffer里把数据读取出来  
                while( (len=inputStream.read(data)) != -1 ){  
                    //用输出流往buffer里写入数据,中间参数代表从哪个位置开始读,len代表读取的长度  
                    fos.write(data, 0, len);  
                } 
                fos.flush();
                fos.close();
                System.out.println("下载完成:" + imageName);
            }catch(Exception e) {
                System.err.println(" 这个图片下载不了哇!
    删除妹子" + imageName);
                return;
            }
        }
    }
    View Code
  • 相关阅读:
    andrew ng 学习
    360一些笔试题
    安装visual studio2010提示“Windows Installer 服务不可用”的解决办法
    算法学习从赌钱游戏看PageRank算法
    jQuery Masonry 一个 jQuery动态网格布局的插件
    国内HTML5前端开发框架汇总
    Windows Performance Monitor 学习笔记
    ThinkPad预装win8系统机型安装win7系统的操作指导
    jQuery的Ajax的自动完成功能控件
    JavaScript的Forms验证Parsley.js
  • 原文地址:https://www.cnblogs.com/feitianshaoxai/p/6595381.html
Copyright © 2011-2022 走看看