概述
在开发过程中,word转pdf的方式有很多种有jar包的方式,有安装openoffice的方式,但是使用有的jar包有license认证,不然会生成水印,综合几种方法我采用了libreoffice的方式
本项目为springboot项目
依赖包
<dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-spring-boot-starter</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.libreoffice</groupId> <artifactId>juh</artifactId> <version>5.4.2</version> </dependency> <dependency> <groupId>org.libreoffice</groupId> <artifactId>jurt</artifactId> <version>5.4.2</version> </dependency> <dependency> <groupId>org.libreoffice</groupId> <artifactId>ridl</artifactId> <version>5.4.2</version> </dependency> <dependency> <groupId>org.libreoffice</groupId> <artifactId>unoil</artifactId> <version>5.4.2</version> </dependency>
application.yml配置
jodconverter:
enabled: true
#office-home: /opt/libreoffice7.0
port-numbers: 8200
max-tasks-per-process: 100
working-dir:
office-home: C:Program FilesLibreOffice
java代码
在类中引入DocumentConverter
@Resource private DocumentConverter documentConverter; //在使用处使用 documentConverter.convert(new File(sourcePath)).to(new File(targetPath)).execute();
docker制作libreoffice镜像
在linux中配置libreoffcie,因为这边采用了docker,所以这边简单介绍下,docker中如何制作libreoffice镜像,并发布项目
首先准备Dockerfile文件,该镜像是以基于centos7版本镜像(注:没有扩展名)
#基于centos7版本镜像 FROM centos:7 #以下设置中文语言环境与修改时区 ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN:zh LC_ALL=zh_CN.UTF-8 RUN yum update -y && yum reinstall -y glibc-common && yum install -y telnet net-tools && yum clean all && rm -rf /tmp/* rm -rf /var/cache/yum/* && localedef -c -f UTF-8 -i zh_CN zh_CN.UTF-8 && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime #加入windows字体包 ADD chinese.tar.gz /usr/share/fonts/ #将下载好的包解压到相应文件下 ADD LibreOffice_7.0.1_Linux_x86-64_rpm.tar.gz /home/ #执行安装 RUN cd /home/LibreOffice_7.0.1.2_Linux_x86-64_rpm/RPMS/ && yum localinstall *.rpm -y #安装依赖 && yum install ibus -y #加入中文字体支持并赋权限 && cd /usr/share/fonts/ && chmod -R 755 /usr/share/fonts && yum install mkfontscale -y && mkfontscale && yum install fontconfig -y && mkfontdir && fc-cache -fv && mkdir /usr/local/java/ #清理缓存,减少镜像大小 && yum clean all #加入安装java环境 ADD jdk-8u121-linux-x64.tar.gz /usr/local/java/ RUN ln -s /usr/local/java/jdk1.8.0_121 /usr/local/java/jdk #配置环境变量 ENV JAVA_HOME /usr/local/java/jdk ENV JRE_HOME ${JAVA_HOME}/jre ENV CLASSPATH .:${JAVA_HOME}/lib:${JRE_HOME}/lib ENV PATH ${JAVA_HOME}/bin:$PATH CMD ["bash"]
还有chinese.tar.gz,jdk-8u121-linux-x64.tar.gz,LibreOffice_7.0.1_Linux_x86-64_rpm.tar.gz这些包,需要和Dockerfile文件放在同一目录下 执行
相关文件下载方式:
链接:https://pan.baidu.com/s/18XjsvvrJfsle9DEcDJKQUQ
提取码:9hke
docker build -t libreoffice:v1 .
命令,注意后边有一个点
另外部署你所要用到libreoffice的项目,相应的Dockerfile文件:
FROM libreoffice:v1 VOLUME /tmp ADD xxx.jar/ app.jar RUN bash -c 'touch /app.jar' ENV TZ=Asia/Shanghai RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone EXPOSE 9501 ENTRYPOINT ["java","-Xmx640M","-Xms640M","-Xmn240M","-XX:MaxMetaspaceSize=192M","-XX:MetaspaceSize=192M","-Djava.security.egd=file:/dev/./urandom","-jar","/app.jar"]
并执行
docker build -t xxx .
然后启动容器即可使用。
加盖印章
概述
加盖印章是在转为pdf的情况下,进行加盖的,采用的技术是itext,该加盖电子印章的优点是相较于之前划定区域来加盖印章的方式来说,可以通过关键字来定位盖章的区域,并根据x轴和y轴的偏移量来调整相对位置,来实现。
之前划定区域的方式也可以,但是如果文档加盖印章的上方,如果写了内容,导致下边文档向下移动,这样会将章的位置盖偏,达不到想要的效果;
相关依赖
<dependency> <groupId>com.itextpdf</groupId> <artifactId>itext-asian</artifactId> <version>5.2.0</version> </dependency>
GetKeyWordPosition.java
import com.itextpdf.awt.geom.Rectangle2D; import com.itextpdf.text.pdf.PdfDictionary; import com.itextpdf.text.pdf.PdfName; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.parser.*; public class GetKeyWordPosition { public static void main(String[] args) throws IOException { //1.给定文件 File pdfFile = new File("C:\Users\xxx\Desktop\结题报告.pdf"); //2.定义一个byte数组,长度为文件的长度 byte[] pdfData = new byte[(int) pdfFile.length()]; //3.IO流读取文件内容到byte数组 FileInputStream inputStream = null; try { inputStream = new FileInputStream(pdfFile); inputStream.read(pdfData); } catch (IOException e) { throw e; } finally { if (inputStream != null) { try { inputStream.close(); } catch (IOException e) { } } } //4.指定关键字 String keyword = "公章"; //5.调用方法,给定关键字和文件 List<float[]> positions = findKeywordPostions(pdfData, keyword); //6.返回值类型是 List<float[]> 每个list元素代表一个匹配的位置,分别为 float[0]所在页码 float[1]所在x轴 float[2]所在y轴 System.out.println("total:" + positions.size()); if (positions != null && positions.size() > 0) { for (float[] position : positions) { System.out.print("pageNum: " + (int) position[0]); System.out.print(" x: " + position[1]); System.out.println(" y: " + position[2]); } } } /** * findKeywordPostions * @param pdfData 通过IO流 PDF文件转化的byte数组 * @param keyword 关键字 * @return List<float [ ]> : float[0]:pageNum float[1]:x float[2]:y * @throws IOException */ public static List<float[]> findKeywordPostions(byte[] pdfData, String keyword) throws IOException { List<float[]> result = new ArrayList<>(); List<PdfPageContentPositions> pdfPageContentPositions = getPdfContentPostionsList(pdfData); for (PdfPageContentPositions pdfPageContentPosition : pdfPageContentPositions) { List<float[]> charPositions = findPositions(keyword, pdfPageContentPosition); if (charPositions == null || charPositions.size() < 1) { continue; } result.addAll(charPositions); } return result; } private static List<PdfPageContentPositions> getPdfContentPostionsList(byte[] pdfData) throws IOException { PdfReader reader = new PdfReader(pdfData); List<PdfPageContentPositions> result = new ArrayList<>(); int pages = reader.getNumberOfPages(); for (int pageNum = 1; pageNum <= pages; pageNum++) { float width = reader.getPageSize(pageNum).getWidth(); float height = reader.getPageSize(pageNum).getHeight(); PdfRenderListener pdfRenderListener = new PdfRenderListener(pageNum, width, height); //解析pdf,定位位置 PdfContentStreamProcessor processor = new PdfContentStreamProcessor(pdfRenderListener); PdfDictionary pageDic = reader.getPageN(pageNum); PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES); try { processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNum), resourcesDic); } catch (IOException e) { reader.close(); throw e; } String content = pdfRenderListener.getContent(); List<CharPosition> charPositions = pdfRenderListener.getcharPositions(); List<float[]> positionsList = new ArrayList<>(); for (CharPosition charPosition : charPositions) { float[] positions = new float[]{charPosition.getPageNum(), charPosition.getX(), charPosition.getY()}; positionsList.add(positions); } PdfPageContentPositions pdfPageContentPositions = new PdfPageContentPositions(); pdfPageContentPositions.setContent(content); pdfPageContentPositions.setPostions(positionsList); result.add(pdfPageContentPositions); } reader.close(); return result; } private static List<float[]> findPositions(String keyword, PdfPageContentPositions pdfPageContentPositions) { List<float[]> result = new ArrayList<>(); String content = pdfPageContentPositions.getContent(); List<float[]> charPositions = pdfPageContentPositions.getPositions(); for (int pos = 0; pos < content.length(); ) { int positionIndex = content.indexOf(keyword, pos); if (positionIndex == -1) { break; } float[] postions = charPositions.get(positionIndex); result.add(postions); pos = positionIndex + 1; } return result; } private static class PdfPageContentPositions { private String content; private List<float[]> positions; public String getContent() { return content; } public void setContent(String content) { this.content = content; } public List<float[]> getPositions() { return positions; } public void setPostions(List<float[]> positions) { this.positions = positions; } } private static class PdfRenderListener implements RenderListener { private int pageNum; private float pageWidth; private float pageHeight; private StringBuilder contentBuilder = new StringBuilder(); private List<CharPosition> charPositions = new ArrayList<>(); public PdfRenderListener(int pageNum, float pageWidth, float pageHeight) { this.pageNum = pageNum; this.pageWidth = pageWidth; this.pageHeight = pageHeight; } public void beginTextBlock() { } public void renderText(TextRenderInfo renderInfo) { List<TextRenderInfo> characterRenderInfos = renderInfo.getCharacterRenderInfos(); for (TextRenderInfo textRenderInfo : characterRenderInfos) { String word = textRenderInfo.getText(); if (word.length() > 1) { word = word.substring(word.length() - 1, word.length()); } Rectangle2D.Float rectangle = textRenderInfo.getAscentLine().getBoundingRectange(); float x = (float)rectangle.getX(); float y = (float)rectangle.getY(); // float x = (float)rectangle.getCenterX(); // float y = (float)rectangle.getCenterY(); // double x = rectangle.getMinX(); // double y = rectangle.getMaxY(); //这两个是关键字在所在页面的XY轴的百分比 float xPercent = Math.round(x / pageWidth * 10000) / 10000f; float yPercent = Math.round((1 - y / pageHeight) * 10000) / 10000f; // CharPosition charPosition = new CharPosition(pageNum, xPercent, yPercent); CharPosition charPosition = new CharPosition(pageNum, (float)x, (float)y); charPositions.add(charPosition); contentBuilder.append(word); } } public void endTextBlock() { } public void renderImage(ImageRenderInfo renderInfo) { } public String getContent() { return contentBuilder.toString(); } public List<CharPosition> getcharPositions() { return charPositions; } } private static class CharPosition { private int pageNum = 0; private float x = 0; private float y = 0; public CharPosition(int pageNum, float x, float y) { this.pageNum = pageNum; this.x = x; this.y = y; } public int getPageNum() { return pageNum; } public float getX() { return x; } public float getY() { return y; } @Override public String toString() { return "[pageNum=" + this.pageNum + ",x=" + this.x + ",y=" + this.y + "]"; } } }
StampUtil.java
import com.itextpdf.text.DocumentException; import com.itextpdf.text.Image; import com.itextpdf.text.pdf.*; public class StampUtil { public static void main(String[] args) throws Exception { //pt3.pdf你在resource目录下找得到,放到绝对路径下就好了 findKeyWordAndAddImage("C:\Users\wanggang\Desktop\2020年 湖南省卫健委科研项目合同书.pdf", "C:\Users\wanggang\Desktop\2020年 湖南省卫健委科研项目合同书seal.pdf", "湖南省卫生健康委(甲方)科技主管部门:(公章)", "C:\Users\wanggang\Desktop\timg.png", 0, -130); } /** * 寻找指定的关键字后给其所在位置盖章 * @param source 源pdf文件 * @param target 目标pdf文件 * @param keyword 关键字 * @param image 印章路径 * @param xOffset x轴偏移量(没有则指定为0) * @param yOffset y轴偏移量(没有则指定为0) * @return 返回结果 * @throws IOException * @throws DocumentException */ public static boolean findKeyWordAndAddImage(String source, String target, String keyword, String image, float xOffset, float yOffset) throws IOException, DocumentException { boolean result = false; File pdfFile = new File(source); byte[] pdfData = new byte[(int) pdfFile.length()]; FileInputStream fis = null; try { fis = new FileInputStream(pdfFile); fis.read(pdfData); } catch (IOException e) { e.printStackTrace(); System.out.println("文件不存在"); return result; }finally { fis.close(); } System.out.println("keyword:"+keyword); //查到关键字返回位置 List<float[]> positions = findKeywordPostions(pdfData, keyword); if(positions.size() == 0){ System.out.println("关键字不存在"); return result; } //添加水印 //会查询到多个关键字定位,固定取最后一个 result = addImage(source, target, image, positions.get(positions.size()-1), xOffset, yOffset); return true; } //添加水印(签章) private static boolean addImage(String source, String target, String imagePath, float[] positions, float xOffset, float yOffset) throws IOException, DocumentException { // 读取模板文件 InputStream input = new FileInputStream(new File(source)); PdfReader reader = new PdfReader(input); FileOutputStream fileOutputStream = new FileOutputStream(target); PdfStamper stamper = new PdfStamper(reader, fileOutputStream); // 提取pdf中的表单 AcroFields form = stamper.getAcroFields(); form.addSubstitutionFont(BaseFont.createFont("STSong-Light","UniGB-UCS2-H", BaseFont.NOT_EMBEDDED)); // 读图片 Image image = Image.getInstance(imagePath); // 获取操作的页面 PdfContentByte under = stamper.getOverContent((int)positions[0]); //设置图片的定位 image.setAbsolutePosition(positions[1]+xOffset, positions[2]+yOffset); //image.scalePercent(75); //缩放图片为指定百分比 // 设置透明度为0.8 PdfGState gs = new PdfGState(); gs.setFillOpacity(0.8f); under.setGState(gs); // 添加图片 under.addImage(image); stamper.close(); fileOutputStream.close(); reader.close(); input.close(); return true; } }