zoukankan      html  css  js  c++  java
  • JAVA-替换html中图片的路径-从html代码中提取图片路径并下载(完整版)

    transHtml方法实现提取网络图片中得图片路径,将其重新下载到本地,并替换html中原来得路径
    package com.googosoft.until;
    
    import java.io.BufferedInputStream;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import javax.servlet.http.HttpServletRequest;
    
    /**
     * @author sonyan
     * @version 2019年11月25日 下午2:12:32
     * @desc
     */
    public class Test {
    
        /**
         * 将html中的图片下载到服务器,并且使用服务器上图片的地址替换图片的网络路径
         * @param html 要处理的html
         * @param request
         * @param uploadFolder 服务器上保存图片的目录
         * @return
         */
        public static String transHtml(String html,HttpServletRequest request,String uploadFolder){
            List<String> imgList = getImgStrList(html,request);
            for (String imgStr : imgList) {
                try {
                    String newUrl = reSaveImage(imgStr, request,uploadFolder);
                    html = html.replace(imgStr, newUrl);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            return html;
        } 
        
        /**
         * 将指定的网络图片保存到本地指定目录
         * @param httpUrl 图片原来的网络路径
         * @param request
         * @param uploadFolder 服务器上保存图片的目录
         * @return httpUrl newPath
         */
        private static String reSaveImage(String httpUrl,  HttpServletRequest request,String uploadFolder){
            FileOutputStream out = null;
            BufferedInputStream in = null;
            HttpURLConnection connection = null;
            Map<String, Object> urlMap = new HashMap<>();
    
            byte[] buf = new byte[1024];
            int len = 0;
            try {
                URL url = new URL(httpUrl);
                connection = (HttpURLConnection) url.openConnection();
                connection.connect();
                in = new BufferedInputStream(connection.getInputStream());
                urlMap = getNewPath(httpUrl, request,uploadFolder);
                out = new FileOutputStream(urlMap.get("newPath").toString());
                while ((len = in.read(buf)) != -1) {
                    out.write(buf, 0, len);
                }
                out.flush();
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    in.close();
                    out.close();
                    connection.disconnect();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
    
            return urlMap.get("newUrl").toString();
        }
    
        /**
         * 获取保存在服务器上的图片的实际存储地址以及访问地址
         * @param httpUrl 图片原来的网络路径
         * @param request
         * @param uploadFolder服务器上保存图片的目录
         * @return
         */
        private static Map<String, Object> getNewPath(String httpUrl, HttpServletRequest request,String uploadFolder) {
            Map<String, Object> relMap = new HashMap<>();
            String fileName = getFileName();
            String filefix = fileName.substring(fileName.lastIndexOf(".") + 1).toLowerCase();// 文件后缀
            fileName = UuidUtil.get32UUID() + "." + filefix;
            String prefix = getUrlPrefix(request) + "/iconimg/";
            relMap.put("newUrl", prefix + fileName);
            relMap.put("newPath", uploadFolder + fileName);
            return relMap;
        }
    
        /**
         * 设置图片的名称(时间+用户编号)
         * @return
         */
        private static String getFileName() {
            return "reload"+File.pathSeparator+new Date().getTime()+".jpg";
        }
    
        /**
         * 提取HTML字符串中的img
         * @param htmlStr 要处理的html字符串
         * @return
         */
        private static List<String> getImgStrList(String htmlStr,HttpServletRequest request) {
            List<String> list = new ArrayList<>();
            String img = "";
            Pattern p_image;
            Matcher m_image;
            String regEx_img = "<img.*src\s*=\s*(.*?)[^>]*?>";
            p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
            m_image = p_image.matcher(htmlStr);
            while (m_image.find()) {
                img = m_image.group();
                Matcher m = Pattern.compile("src\s*=\s*"?(.*?)("|>|\s+)").matcher(img);
                while (m.find()) {
                    String path = m.group(1);
                    if(!path.startsWith(getUrlPrefix(request))){
                        list.add(handleSrc(path));
                    }
                }
            }
            return list;
        }
    
        /**
         * 去除src路径中的前后引号
         * @param src 图片的src路径
         * @return
         */
        private static String handleSrc(String src) {
            if (src != null) {
                if (src.startsWith("'")|| src.startsWith(""")) {
                    return src.substring(1, src.length());
                }
                if (src.endsWith("'")|| src.endsWith(""")) {
                    return src.substring(0, src.length());
                }
            }
            return src;
        }
        
        /**
         * 获取网站的URL
         * @param request
         * @return 例如:http://192.168.11.3:8089
         */
        public static String getUrlPrefix(HttpServletRequest request) {
            StringBuffer str = new StringBuffer();
            str.append(request.getScheme());
            str.append("://");
            str.append(request.getServerName());
            if (80 != request.getServerPort()) {
                str.append(":" + request.getServerPort());
            }
            str.append(request.getContextPath());
            return str.toString();
        }
    }
  • 相关阅读:
    MapReduce之Map Join
    MapReduce之Reduce Join
    MapReduce清洗日志数据统计PV量
    Hadoop MapReduce自定义数据类型
    ES6 对象拓展方法
    ES6箭头函数与this指向
    ES6参数默认值,剩余参数及展开数组
    ES6模板字符串及字符串的扩展方法
    浅谈ES6数组及对象的解构
    ECMAScript概述及浅谈const,let与块级作用域
  • 原文地址:https://www.cnblogs.com/excellencesy/p/11927192.html
Copyright © 2011-2022 走看看