zoukankan      html  css  js  c++  java
  • Java 中 利用正则表达式 获取 网页图片



    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.InputStream;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;

    /***
    * java抓取网络图片
    * @author swinglife
    *
    */
    public class pimg {

    // 地址
    private static final String URL = "http://www.csdn.net";
    // 编码
    private static final String ECODING = "UTF-8";
    // 获取img标签正则
    private static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";
    // 获取src路径的正则
    private static final String IMGSRC_REG = "http:"?(.*?)("|>|\s+)";


    public static void main(String[] args) throws Exception {
    pimg cm = new pimg();
    //获得html文本内容
    String HTML = cm.getHTML(URL);
    //获取图片标签
    List<String> imgUrl = cm.getImageUrl(HTML);
    //获取图片src地址
    List<String> imgSrc = cm.getImageSrc(imgUrl);
    //下载图片
    cm.Download(imgSrc);
    }


    /***
    * 获取HTML内容
    *
    * @param url
    * @return
    * @throws Exception
    */
    private String getHTML(String url) throws Exception {
    URL uri = new URL(url);
    URLConnection connection = uri.openConnection();
    InputStream in = connection.getInputStream();
    byte[] buf = new byte[1024];
    int length = 0;
    StringBuffer sb = new StringBuffer();
    while ((length = in.read(buf, 0, buf.length)) > 0) {
    sb.append(new String(buf, ECODING));
    }
    in.close();
    return sb.toString();
    }

    /***
    * 获取ImageUrl地址
    *
    * @param HTML
    * @return
    */
    private List<String> getImageUrl(String HTML) {
    Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
    List<String> listImgUrl = new ArrayList<String>();
    while (matcher.find()) {
    listImgUrl.add(matcher.group());
    }
    return listImgUrl;
    }

    /***
    * 获取ImageSrc地址
    *
    * @param listImageUrl
    * @return
    */
    private List<String> getImageSrc(List<String> listImageUrl) {
    List<String> listImgSrc = new ArrayList<String>();
    for (String image : listImageUrl) {
    Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);
    while (matcher.find()) {
    listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1));
    }
    }
    return listImgSrc;
    }

    /***
    * 下载图片
    *
    * @param listImgSrc
    */
    private void Download(List<String> listImgSrc) {
    try {
    for (String url : listImgSrc) {
    String imageName = url.substring(url.lastIndexOf("/") + 1, url.length());
    URL uri = new URL(url);
    InputStream in = uri.openStream();
    FileOutputStream fo = new FileOutputStream("C:/Users/tutu/Desktop/img/"+imageName"//自定义路径);// new File(imageName)相对绝对路径
                    byte[] buf = new byte[1024];
    int length = 0;
    System.out.println("开始下载:" + url);
    while ((length = in.read(buf, 0, buf.length)) != -1) {
    fo.write(buf, 0, length);
    }
    in.close();
    fo.close();
    System.out.println(imageName + "下载完成");
    }
    } catch (Exception e) {
    System.out.println("下载失败");
    }
    }


    }
  • 相关阅读:
    读后感悟
    使用java的循环单向链表解决约瑟夫问题
    稀疏数组
    java实现队列
    ASP.NET学习4. ASP.NET Ajax下POST完成后调用javascript函数
    ASP.NET学习3.前端和后台的相互调用
    ASP.NET学习1.使用“<% %>”嵌入代码
    ASP.NET学习2.弹出对话框的方法
    HTML5程序怎么打包成windows phne, ios和android的应用[转]
    PHP多文件上传个人理解总结 [转]
  • 原文地址:https://www.cnblogs.com/tutu21ybz/p/6737406.html
Copyright © 2011-2022 走看看