zoukankan      html  css  js  c++  java
  • java捕获一个网站页面的全部图片

    直接上代码:

    package com.jeecg.util;
    
    import java.io.BufferedReader;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class CatchImage {
        // 地址
        private static final String URL = "http://news.163.com/";// 编码
        private static final String ECODING = "UTF-8";
        // 获取img标签正则
        private static final String IMGURL_REG = "<img src=(.*?)[^>]*?>";
        // 获取src路径的正则
        private static final String IMGSRC_REG = "http:.+(\.jpeg|\.jpg|\.png|\.gif)"";
        
    
        public static void main(String[] args) throws Exception {
            CatchImage cm = new CatchImage();
    
            // 获得html文本内容
            String HTML = cm.getHTML(URL);
    
            // 获取图片标签
            List<String> imgUrl = cm.getImageUrl(HTML);
    
            // 获取图片src地址
            List<String> imgSrc = cm.getImageSrc(imgUrl);
    
            // 下载图片 cm.Download(imgSrc);
            cm.Download(imgSrc);
        }
    
        /**
         * 
         * 
         * 获取HTML内容
         * 
         * @param url
         * @return
         * @throws Exception
         **/
        private String getHTML(String oldLink) throws Exception {
            StringBuffer sb = new StringBuffer();
            URL url = new URL(oldLink);
            HttpURLConnection connection = (HttpURLConnection) url.openConnection();
            connection.setRequestMethod("GET");
            connection.setConnectTimeout(2000);
            connection.setReadTimeout(2000);
            if (connection.getResponseCode() == 200) {
                InputStream inputStream = connection.getInputStream();
                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
                String line = "";
                while ((line = reader.readLine()) != null) {
                    sb.append(line);
                    
                }
            }
            return sb.toString();
        }
    
        /**
         * 获取ImageUrl地址
         * 
         * @param HTML
         * 
         * @return
         */
        private List<String> getImageUrl(String HTML) {
            Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
            List<String> listImgUrl = new ArrayList<String>();
            while (matcher.find()) {
                listImgUrl.add(matcher.group());
            }
            return listImgUrl;
        }
    
        /**
         * 获取ImageSrc地址
         * 
         * @param listImageUrl
         * 
         * @return
         **/
        private List<String> getImageSrc(List<String> listImageUrl) {
            List<String> listImgSrc = new ArrayList<String>();
            for (String image : listImageUrl) {
                Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);
                while (matcher.find()) {
                    listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1));
                }
            }
            return listImgSrc;
        }
    
        /**
         * 下载图片
         * 
         * @param listImgSrc
         * @throws FileNotFoundException 
         **/
        private void Download(List<String> listImgSrc) throws Exception {
            int count = 0;
            ArrayList al = new ArrayList();
            for (String urll : listImgSrc) {
                System.out.println(urll);
                Pattern p = Pattern.compile("\.jpg|\.png|\.gif|\.jpeg[^_]");
                Matcher m = p.matcher(urll);
                while (m.find()) {
                    al.add(m.group());
                }
            }
            for (String url : listImgSrc) {
                System.out.println(url);
                URL uri = new URL(url);
                InputStream in = uri.openStream();
    
                FileOutputStream fo = new FileOutputStream("D:/imgPage/" + count + al.get(count));
    
                byte[] buf = new byte[1024];
                int length = 0;
                System.out.println("开始下载:" + url);
                while ((length = in.read(buf, 0, buf.length)) != -1) {
                    fo.write(buf, 0, length);
                }
                in.close();
                fo.close();
                System.out.println("下载完成");
                count++;
            }
            System.out.println(count);
        }
    }

     

  • 相关阅读:
    [POI2013]LUK-Triumphal arch
    [CF1149C](Tree Generator)
    NOI2018归程
    [CF191](Fools and Roads)
    [CF700E](Cool Slogans)
    我石乐志
    想题的时候不要颓废
    人不能忘耻
    反思
    中考加油!
  • 原文地址:https://www.cnblogs.com/shuilangyizu/p/11157333.html
Copyright © 2011-2022 走看看