zoukankan      html  css  js  c++  java
  • JAVA导出下载word文档(导出带富文本图片word)

    文档下载,导入jsoup的jar包,处理html代码

    <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.7.3</version>
    </dependency> 

    以下是几个必要的文件:

    RichHtmlHandler.java

    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.UUID;
    
    import org.apache.commons.lang3.StringUtils;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    /**
     * @Description:富文本Html处理器,主要处理图片及编码
     * 
     */
    public class RichHtmlHandler {
    
        private Document doc = null;
        private String html;
    
        private String docSrcParent = "paper.files";     //在paper.ftl文件里面找到,检索“Content-Location”
        private String docSrcLocationPrex = "file:///C:/D1324D12";   //在paper.ftl文件里面找到,检索“Content-Location”
        private String nextPartId = "01D2EB53.503F62F0";     //在paper.ftl文件里面找到,最末行
        private String shapeidPrex = "_x56fe__x7247__x0020";
        private String spidPrex = "_x0000_i";
        private String typeid = "#_x0000_t75";
    
        private String handledDocBodyBlock;
        private List<String> docBase64BlockResults = new ArrayList<String>();
        private List<String> xmlImgRefs = new ArrayList<String>();
        
        private String srcPath = "";
        
        public RichHtmlHandler(){}
    
        public String getDocSrcLocationPrex() {
            return docSrcLocationPrex;
        }
    
        public void setDocSrcLocationPrex(String docSrcLocationPrex) {
            this.docSrcLocationPrex = docSrcLocationPrex;
        }
    
        public String getNextPartId() {
            return nextPartId;
        }
    
        public void setNextPartId(String nextPartId) {
            this.nextPartId = nextPartId;
        }
    
        public String getHandledDocBodyBlock() {
            String raw=   WordHtmlGeneratorHelper.string2Ascii(doc.getElementsByTag("body").html());
            return raw.replace("=3D", "=").replace("=", "=3D");
        }
        
        public String getRawHandledDocBodyBlock() {
            String raw=  doc.getElementsByTag("body").html();
            return raw.replace("=3D", "=").replace("=", "=3D");
        }
        public List<String> getDocBase64BlockResults() {
            return docBase64BlockResults;
        }
    
        public List<String> getXmlImgRefs() {
            return xmlImgRefs;
        }
    
        public String getShapeidPrex() {
            return shapeidPrex;
        }
    
        public void setShapeidPrex(String shapeidPrex) {
            this.shapeidPrex = shapeidPrex;
        }
    
        public String getSpidPrex() {
            return spidPrex;
        }
    
        public void setSpidPrex(String spidPrex) {
            this.spidPrex = spidPrex;
        }
    
        public String getTypeid() {
            return typeid;
        }
    
        public void setTypeid(String typeid) {
            this.typeid = typeid;
        }
    
        public String getDocSrcParent() {
            return docSrcParent;
        }
    
        public void setDocSrcParent(String docSrcParent) {
            this.docSrcParent = docSrcParent;
        }
    
        public String getHtml() {
            return html;
        }
    
        public void setHtml(String html) {
            this.html = html;
        }
    
        public RichHtmlHandler(String html, String srcPath) {
            this.html = html;
            this.srcPath = srcPath;
            doc = Jsoup.parse(wrappHtml(this.html));
            try {
                handledHtml(false);
            } catch (IOException e) {
                
                e.printStackTrace();
            }
        }
        
        public void re_init(String html){
            doc=null;
            doc = Jsoup.parse(wrappHtml(html));
            docBase64BlockResults.clear();
            xmlImgRefs.clear();
        }
        
        /**
         * @Description: 获得已经处理过的HTML文件
         * @param @return
         * @return String
         * @throws IOException 
         * @throws
         */
        public void handledHtml(boolean isWebApplication)
                throws IOException {
            Elements imags = doc.getElementsByTag("img");
            System.out.println("doc:
    "+doc);
            if (imags == null || imags.size() == 0) {
                // 返回编码后字符串
                return;
                //handledDocBodyBlock = WordHtmlGeneratorHelper.string2Ascii(html);
            }
    
            // 转换成word mht 能识别图片标签内容,去替换html中的图片标签
    
            for (Element item : imags) {
                // 把文件取出来
                String src = item.attr("src");
                String srcRealPath = srcPath + src;
                
    //            String thepaths = RichHtmlHandler.class.getClassLoader().getResource("").toString();
    //            System.out.println("src="+src+"     thepaths="+thepaths);
                if (isWebApplication) {
    //                String contentPath=RequestResponseContext.getRequest().getContextPath();
    //                if(!StringUtils.isEmpty(contentPath)){
    //                    if(src.startsWith(contentPath)){
    //                        src=src.substring(contentPath.length());
    //                    }
    //                }
    //                
    //                srcRealPath = RequestResponseContext.getRequest().getSession()
    //                        .getServletContext().getRealPath(src);
                    
                }
                
                File imageFile = new File(srcRealPath);
                String imageFielShortName = imageFile.getName();
                String fileTypeName = WordImageConvertor.getFileSuffix(srcRealPath);
    
                String docFileName = "image" + UUID.randomUUID().toString() + "."+ fileTypeName;
                String srcLocationShortName = docSrcParent + "/" + docFileName;
    
                String styleAttr = item.attr("style"); // 样式
                //高度
                String imagHeightStr=item.attr("height");
                if(StringUtils.isEmpty(imagHeightStr)){
                    imagHeightStr = getStyleAttrValue(styleAttr, "height");
                }
                //宽度
                String imagWidthStr=item.attr("width");;
                if(StringUtils.isEmpty(imagWidthStr)){
                    imagWidthStr = getStyleAttrValue(styleAttr, "width");
                }
        
                imagHeightStr = imagHeightStr.replace("px", "");
                imagWidthStr = imagWidthStr.replace("px", "");
                if(StringUtils.isEmpty(imagHeightStr)){
                    //去得到默认的文件高度
                    imagHeightStr="0";
                }
                if(StringUtils.isEmpty(imagWidthStr)){
                    imagWidthStr="0";
                }
                int imageHeight = Integer.parseInt(imagHeightStr);
                int imageWidth = Integer.parseInt(imagWidthStr);
                
                // 得到文件的word mht的body块
                String handledDocBodyBlock = WordImageConvertor.toDocBodyBlock(srcRealPath,
                        imageFielShortName, imageHeight, imageWidth,styleAttr,
                        srcLocationShortName, shapeidPrex, spidPrex, typeid);
    
                //这里的顺序有点问题:应该是替换item,而不是整个后面追加
                //doc.rreplaceAll(item.toString(), handledDocBodyBlock);
                item.after(handledDocBodyBlock);
    //            item.parent().append(handledDocBodyBlock);
                item.remove();
                // 去替换原生的html中的imag
    
                String base64Content = WordImageConvertor.imageToBase64(srcRealPath);
                String contextLoacation = docSrcLocationPrex + "/" + docSrcParent + "/" + docFileName;
    
                String docBase64BlockResult = WordImageConvertor.generateImageBase64Block(nextPartId, contextLoacation,
                                fileTypeName, base64Content);
                docBase64BlockResults.add(docBase64BlockResult);
    
                String imagXMLHref = "<o:File HRef=3D"" + docFileName + ""/>";
                xmlImgRefs.add(imagXMLHref);
    
            }
    
        }
    
        private String getStyleAttrValue(String style, String attributeKey) {
            if (StringUtils.isEmpty(style)) {
                return "";
            }
    
            // 以";"分割
            String[] styleAttrValues = style.split(";");
            for (String item : styleAttrValues) {
                // 在以 ":"分割
                String[] keyValuePairs = item.split(":");
                if (attributeKey.equals(keyValuePairs[0])) {
                    return keyValuePairs[1];
                }
            }
    
            return "";
        }
        
        private String wrappHtml(String html){
            // 因为传递过来都是不完整的doc
            StringBuilder sb = new StringBuilder();
            sb.append("<html>");
            sb.append("<body>");
            sb.append(html);
    
            sb.append("</body>");
            sb.append("</html>");
            return sb.toString();
        }    
        
        public String getData(List<String> list){
            String data = "";
            if (list != null && list.size() > 0) {
                for (String string : list) {
                    data += string + "
    ";
                }
            }
            return data;
        }
    }

    WordHtmlGeneratorHelper.java

    import java.lang.reflect.Field;
    import java.util.ArrayList;
    import java.util.Collection;
    import java.util.Date;
    import java.util.List;
    import java.util.Map;
    
    import org.apache.commons.beanutils.PropertyUtils;
    import org.springframework.util.ReflectionUtils;
    import org.apache.commons.lang3.StringUtils;
    import org.springframework.util.ReflectionUtils.FieldCallback;
    
    
    
    /**   
    * @Description:word 网页导出(单文件网页导出,mht文件格式)
    *   
    */
    public class WordHtmlGeneratorHelper  {
        
        /**   
        * @Description: 将字符换成3Dus-asci,十进制Accsii码
        * @param @param source
        * @param @return    
        * @return String    
        * @throws
        */ 
        public static String string2Ascii(String source){
            if(source==null || source==""){
                return null;
            }
            StringBuilder sb=new StringBuilder();
            
            char[] c=source.toCharArray();
            for(char item : c){
                String itemascii="";
                if(item>=19968 && item<40623){
                    itemascii=itemascii="&#"+(item & 0xffff)+";";
                }else{
                    itemascii=item+"";
                }
                sb.append(itemascii);
            }
            
            return sb.toString();
            
        }
        
        /**   
        * @Description: 将object的所有属性值转成成3Dus-asci编码值
        * @param @param object
        * @param @return    
        * @return T    
        * @throws
        */ 
        public static <T extends Object> T handleObject2Ascii(final T toHandleObject){
            
            class myFieldsCallBack  implements FieldCallback{
    
                @Override
                public void doWith(Field f) throws IllegalArgumentException,
                        IllegalAccessException {
                    if(f.getType().equals(String.class)){
                        //如果是字符串类型
                        f.setAccessible(true);
                        String oldValue=(String)f.get(toHandleObject);
                        if(!StringUtils.isEmpty(oldValue)){
                            f.set(toHandleObject, string2Ascii(oldValue));
                        }
                        
                        //f.setAccessible(false);
                    }
                }
            }
        
            ReflectionUtils.doWithFields(toHandleObject.getClass(), new myFieldsCallBack());
            
            return toHandleObject;
        }
        
        
        public static <T extends Object> List<T> handleObjectList2Ascii(final List<T> toHandleObjects){
            
            for (T t : toHandleObjects) {
                handleObject2Ascii(t);
            }
            
            return toHandleObjects;
        }
        
        
        public static void handleAllObject(Map<String, Object> dataMap){
            
            //去处理数据
            for (Map.Entry<String, Object> entry : dataMap.entrySet()){
                Object item=entry.getValue();
                
                //判断object是否是primitive type 
                if(isPrimitiveType(item.getClass())){
                    if(item.getClass().equals(String.class)){
                        item=WordHtmlGeneratorHelper.string2Ascii((String)item);
                        entry.setValue(item);
                    }
                }else if(isCollection(item.getClass())){
                    for (Object itemobject : (Collection)item) {
                        WordHtmlGeneratorHelper.handleObject2Ascii(itemobject);
                    }
                }else{
                    WordHtmlGeneratorHelper.handleObject2Ascii(item);
                }
            }
            
        }
        
        public static String joinList(List<String> list,String join ){
            StringBuilder sb=new StringBuilder();
            for (String t : list) {
                sb.append(t);
                if(!StringUtils.isEmpty(join)){
                    sb.append(join);
                }
            }
            
            return sb.toString();
        } 
        
        
        private static boolean isPrimitiveType(Class<?> clazz){
            return clazz.isEnum() ||
            CharSequence.class.isAssignableFrom(clazz) ||
            Number.class.isAssignableFrom(clazz) ||
            Date.class.isAssignableFrom(clazz);
            
        }
        private static boolean isCollection(Class<?> clazz){
            return Collection.class.isAssignableFrom(clazz);
        }
        
        
        
    }

    WordImageConvertor.java

    import java.awt.image.BufferedImage;
    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.math.BigDecimal;
    import java.util.UUID;
    
    import javax.imageio.ImageIO;
    
    import org.apache.commons.codec.binary.Base64;
    
    import sun.misc.BASE64Encoder;
    
    
    
    /**   
    * @Description:WORD 文档图片转换器
    *   
    */
    public class WordImageConvertor {
        
        //private static Const WORD_IMAGE_SHAPE_TYPE_ID="";
        
        /**   
        * @Description: 将图片转换成base64编码的字符串  
        * @param @param imageSrc 文件路径
        * @param @return    
        * @return String   
         * @throws IOException 
         * @throws
        */ 
        public static String imageToBase64(String imageSrc) throws IOException{
            //判断文件是否存在
            File file=new File(imageSrc);
            if(!file.exists()){
                throw new FileNotFoundException("文件不存在!");
            }
            StringBuilder pictureBuffer = new StringBuilder();
            FileInputStream input=new FileInputStream(file);
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            
            //读取文件
            
            //BufferedInputStream bi=new BufferedInputStream(in);
            Base64 base64=new Base64();
            BASE64Encoder encoder=new BASE64Encoder();
            byte[] temp = new byte[1024];
            for(int len = input.read(temp); len != -1;len = input.read(temp)){
                out.write(temp, 0, len);
                //out(pictureBuffer.toString());
                //out.reset();
            }
            pictureBuffer.append(new String( base64.encodeBase64Chunked(out.toByteArray())));
            //pictureBuffer.append(encoder.encodeBuffer(out.toByteArray()));
            
            
            /*byte[] data=new byte[input.available()];
            input.read(data);
            pictureBuffer.append(base64.encodeBase64String (data));*/
            
            input.close();
            /*BASE64Decoder decoder=new BASE64Decoder();
            FileOutputStream write = new FileOutputStream(new File("c:\test2.jpg"));
            //byte[] decoderBytes = decoder.decodeBuffer (pictureBuffer.toString());
            byte[] decoderBytes = base64.decodeBase64(pictureBuffer.toString());
            write.write(decoderBytes);
            write.close();*/
            
            return pictureBuffer.toString();
        }
        
        
        
        public static String toDocBodyBlock(
                String imageFilePath,
                String imageFielShortName,
                int imageHeight,
                int imageWidth,
                String imageStyle,
                String srcLocationShortName,
                String shapeidPrex,String spidPrex,String typeid){
            //shapeid
            //mht文件中针对shapeid的生成好像规律,其内置的生成函数没法得知,但是只要保证其唯一就行
            //这里用前置加32位的uuid来保证其唯一性。
            String shapeid=shapeidPrex;
            shapeid+=UUID.randomUUID().toString();
            
            //spid ,同shapeid处理
            String spid=spidPrex;
            spid+=UUID.randomUUID().toString();
            
            
        /*    <!--[if gte vml 1]><v:shape id=3D"_x56fe__x7247__x0020_0" o:spid=3D"_x0000_i10=
                    26"
                       type=3D"#_x0000_t75" alt=3D"725017921264249223.jpg" style=3D'456.7=
                    5pt;
                       height:340.5pt;visibility:visible;mso-wrap-style:square'>
                       <v:imagedata src=3D"file9462.files/image001.jpg" o:title=3D"725017921264=
                    249223"/>
                      </v:shape><![endif]--><![if !vml]><img width=3D609 height=3D454
                      src=3D"file9462.files/image002.jpg" alt=3D725017921264249223.jpg v:shapes=
                    =3D"_x56fe__x7247__x0020_0"><![endif]>*/
            StringBuilder sb1=new StringBuilder();
            
            sb1.append(" <!--[if gte vml 1]>");
            sb1.append("<v:shape id=3D"" + shapeid+""");
            sb1.append("
    ");
            sb1.append(" o:spid=3D""+ spid +""" );
            sb1.append(" type=3D""+  typeid +"" alt=3D"" + imageFielShortName +""");
            sb1.append("
    ");
            sb1.append( " style=3D' " + generateImageBodyBlockStyleAttr(imageFilePath,imageHeight,imageWidth) + imageStyle +"'");
            sb1.append(">");
            sb1.append("
    ");
            sb1.append(" <v:imagedata src=3D"" + srcLocationShortName +"""  );
            sb1.append("
    ");
            sb1.append(" o:title=3D"" + imageFielShortName.split("\.")[0]+"""  );
            sb1.append("/>");
            sb1.append("</v:shape>");
            sb1.append("<![endif]-->");
            
            //以下是为了兼容游览器显示时的效果,但是如果是纯word阅读的话没必要这么做。
        /*    StringBuilder sb2=new StringBuilder();
            sb2.append(" <![if !vml]>");
            
            sb2.append("<img width=3D"+imageWidth +" height=3D" +imageHeight +
                      " src=3D"" + srcLocationShortName +"" alt=" +imageFielShortName+
                      " v:shapes=3D"" + shapeid +"">");
            
            sb2.append("<![endif]>");*/
            
            //return sb1.toString()+sb2.toString();
            return sb1.toString();
        }
        
        /**   
        * @Description: 生成图片的base4块  
        * @param @param nextPartId
        * @param @param contextLoacation
        * @param @param ContentType
        * @param @param base64Content
        * @param @return    
        * @return String    
        * @throws
        */ 
        public static String generateImageBase64Block(String nextPartId,String contextLoacation,
                                        String fileTypeName,String base64Content){
            /*--=_NextPart_01D188DB.E436D870
                    Content-Location: file:///C:/70ED9946/file9462.files/image001.jpg
                    Content-Transfer-Encoding: base64
                    Content-Type: image/jpeg
                    
                    base64Content
            */
            
            StringBuilder sb=new StringBuilder();
            sb.append("
    ");
            sb.append("
    ");
            sb.append("------=_NextPart_"+nextPartId);
            sb.append("
    ");
            sb.append("Content-Location: "+ contextLoacation);
            sb.append("
    ");
            sb.append("Content-Transfer-Encoding: base64");
            sb.append("
    ");
            sb.append("Content-Type: " + getImageContentType(fileTypeName));
            sb.append("
    ");
            sb.append("
    ");
            sb.append(base64Content);
            
            return sb.toString();
        }
        
        
        private static String generateImageBodyBlockStyleAttr(String imageFilePath, int height,int width){
            StringBuilder sb=new StringBuilder();
            
            BufferedImage sourceImg;
            try {
                sourceImg = ImageIO.read(new FileInputStream(imageFilePath));
                if(height==0){
                    height=sourceImg.getHeight();
                }
                if(width==0){
                    width=sourceImg.getWidth();
                }
                
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
            
            
            //将像素转化成pt 
            BigDecimal heightValue=new BigDecimal(height*12/16);
            heightValue= heightValue.setScale(2, BigDecimal.ROUND_HALF_UP);
            BigDecimal widthValue=new BigDecimal(width*12/16);
            widthValue= widthValue.setScale(2, BigDecimal.ROUND_HALF_UP);
          
            sb.append("height:"+heightValue +"pt;");
            sb.append(""+widthValue +"pt;");
            sb.append("visibility:visible;");
            sb.append("mso-wrap-style:square; ");
            
            
            return sb.toString();
        }
        
        private static String getImageContentType(String fileTypeName){
            String result="image/jpeg";
            //http://tools.jb51.net/table/http_content_type
            if(fileTypeName.equals("tif") || fileTypeName.equals("tiff")){
                result="image/tiff";
            }else if(fileTypeName.equals("fax")){
                result="image/fax";
            }else if(fileTypeName.equals("gif")){
                result="image/gif";
            }else if(fileTypeName.equals("ico")){
                result="image/x-icon";
            }else if(fileTypeName.equals("jfif") || fileTypeName.equals("jpe") 
                        ||fileTypeName.equals("jpeg")  ||fileTypeName.equals("jpg")){
                result="image/jpeg";
            }else if(fileTypeName.equals("net")){
                result="image/pnetvue";
            }else if(fileTypeName.equals("png") || fileTypeName.equals("bmp") ){
                result="image/png";
            }else if(fileTypeName.equals("rp")){
                result="image/vnd.rn-realpix";
            }else if(fileTypeName.equals("rp")){
                result="image/vnd.rn-realpix";
            }
            
            return result;
        }
        
        
        public static String getFileSuffix(String srcRealPath){
            int lastIndex = srcRealPath.lastIndexOf(".");
            String suffix = srcRealPath.substring(lastIndex + 1);
    //        String suffix = srcRealPath.substring(srcRealPath.indexOf(".")+1);
            return suffix;
        }
        
        
        
        
    }

    Test.java

       //content:待处理的富文本内容,
        比如: 图片上传对对对 <img src="/../upload/image/20170615/1497500926071064595.jpg"
                   title="1497500926071064595.jpg" _src="/../upload/image/20170615/1497500926071064595.jpg" alt="ie知
                   识点请求2.jpg" width="178" height="83" style=" 178px; height: 83px;" />不对fdasdfsadfsadffD
    
       RichHtmlHandler handler = new RichHtmlHandler(content, appRoot + File.separator);
       bo.setQuestionContent(handler.getHandledDocBodyBlock());
       handledBase64Block += handler.getData(handler.getDocBase64BlockResults());
       xmlimaHref += handler.getData(handler.getXmlImgRefs());
    
            dataMap.put("imagesBase64String", handledBase64Block);
                dataMap.put("imagesXmlHrefString", xmlimaHref);

    paper.ftl 里面要有相关的占位符${imagesBase64String} 、${imagesXmlHrefString}

    MIME-Version: 1.0
    Content-Type: multipart/related; boundary="----=_NextPart_01D2EB53.503F62F0"
    
    此文档为“单个文件网页”,也称为“Web 档案”文件。如果您看到此消息,但是您的浏览器或编辑器不支持“Web 档案”文件。请下载支持“Web 档案”的浏览器,如 Windows? Internet Explorer?。
    
    ------=_NextPart_01D2EB53.503F62F0
    Content-Location: file:///C:/D1324D12/paper.htm
    Content-Transfer-Encoding: quoted-printable
    Content-Type: text/html; charset=3D"utf-8"
    
    <html xmlns:v=3D"urn:schemas-microsoft-com:vml"
    xmlns:o=3D"urn:schemas-microsoft-com:office:office"
    xmlns:w=3D"urn:schemas-microsoft-com:office:word"
    xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml"
    xmlns=3D"http://www.w3.org/TR/REC-html40">
    
    <head>
    <meta http-equiv=3DContent-Type content=3D"text/html; charset=3Dutf-8">
    ----
    ----
    ----
    -----
    -----
    ------
    -------
    --------省略。。。。。
    </body>
    
    </html>
    
    ------=_NextPart_01D2EB53.503F62F0
    Content-Location: file:///C:/D1324D12/paper.files/filelist.xml
    Content-Transfer-Encoding: quoted-printable
    Content-Type: text/xml; charset="utf-8"
    
    <xml xmlns:o=3D"urn:schemas-microsoft-com:office:office">
     <o:MainFile HRef=3D"../paper.htm"/>
     <o:File HRef=3D"themedata.thmx"/>
     <o:File HRef=3D"colorschememapping.xml"/>
     ${imagesXmlHrefString}
     <o:File HRef=3D"header.htm"/>
     <o:File HRef=3D"filelist.xml"/>
    </xml>
    ------=_NextPart_01D2EB53.503F62F0--

    网页效果:

    下载效果:

    完整代码:  https://github.com/shandianlala/sdll-blog

     欢迎加入“Java Communication” 交流群,群号:622810880

  • 相关阅读:
    java 键盘监听事件
    DOM扩展
    DOM
    CSS hack
    客户端检测
    BOM
    函数表达式
    面向对象的程序设计
    引用类型(下)
    引用类型(上)
  • 原文地址:https://www.cnblogs.com/sdll/p/7619786.html
Copyright © 2011-2022 走看看