zoukankan      html  css  js  c++  java
  • JAVA抓取网页图片并下载到本地

    package com.yong.util;
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.InputStream;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /***
     * java抓取网络图片
     * @author swinglife
     *
     */
    public class CatchImage {
    
    	// 地址
    	private static final String URL = "http://www.baidu.com";
    	// 编码
    	private static final String ECODING = "UTF-8";
    	// 获取img标签正则
    	private static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";
    	// 获取src路径的正则
    	private static final String IMGSRC_REG = "http:"?(.*?)("|>|\s+)";
    
    	
    	public static void main(String[] args) throws Exception {
    		CatchImage cm = new CatchImage();
    		//获得html文本内容
    		String HTML = cm.getHTML(URL);
    		//获取图片标签
    		List<String> imgUrl = cm.getImageUrl(HTML);
    		//获取图片src地址
    		List<String> imgSrc = cm.getImageSrc(imgUrl);
    		//下载图片
    		cm.Download(imgSrc);
    	}
    	
    	
    	/***
    	 * 获取HTML内容
    	 * 
    	 * @param url
    	 * @return
    	 * @throws Exception
    	 */
    	private String getHTML(String url) throws Exception {
    		URL uri = new URL(url);
    		URLConnection connection = uri.openConnection();
    		InputStream in = connection.getInputStream();
    		byte[] buf = new byte[1024];
    		int length = 0;
    		StringBuffer sb = new StringBuffer();
    		while ((length = in.read(buf, 0, buf.length)) > 0) {
    			sb.append(new String(buf, ECODING));
    		}
    		in.close();
    		return sb.toString();
    	}
    
    	/***
    	 * 获取ImageUrl地址
    	 * 
    	 * @param HTML
    	 * @return
    	 */
    	private List<String> getImageUrl(String HTML) {
    		Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
    		List<String> listImgUrl = new ArrayList<String>();
    		while (matcher.find()) {
    			listImgUrl.add(matcher.group());
    		}
    		return listImgUrl;
    	}
    
    	/***
    	 * 获取ImageSrc地址
    	 * 
    	 * @param listImageUrl
    	 * @return
    	 */
    	private List<String> getImageSrc(List<String> listImageUrl) {
    		List<String> listImgSrc = new ArrayList<String>();
    		for (String image : listImageUrl) {
    			Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);
    			while (matcher.find()) {
    				listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1));
    			}
    		}
    		return listImgSrc;
    	}
    
    	/***
    	 * 下载图片
    	 * 
    	 * @param listImgSrc
    	 */
    	private void Download(List<String> listImgSrc) {
    		try {
    			for (String url : listImgSrc) {
    				String imageName = url.substring(url.lastIndexOf("/") + 1, url.length());
    				URL uri = new URL(url);
    				InputStream in = uri.openStream();
    				FileOutputStream fo = new FileOutputStream(new File(imageName));
    				byte[] buf = new byte[1024];
    				int length = 0;
    				System.out.println("开始下载:" + url);
    				while ((length = in.read(buf, 0, buf.length)) != -1) {
    					fo.write(buf, 0, length);
    				}
    				in.close();
    				fo.close();
    				System.out.println(imageName + "下载完成");
    			}
    		} catch (Exception e) {
    			System.out.println("下载失败");
    		}
    	}
    
    	
    }
    

  • 相关阅读:
    HihoCoder 1245:王胖浩与三角形 三角形边长与面积
    C++ 读写注册表
    Codestorm:Counting Triangles 查各种三角形的个数
    2015年10月之 叽里咕噜
    HDU 5523:Game
    Codestorm:Game with a Boomerang
    关于GPU-driver for linux的资料
    ACER NV47H75C 安装CUDA 驱动以及调整屏幕
    服务器GTX590安装CUDA
    观后感,读了几篇博文
  • 原文地址:https://www.cnblogs.com/whzhaochao/p/5023412.html
Copyright © 2011-2022 走看看