zoukankan      html  css  js  c++  java
  • Java定位PDF中关键字的坐标

    使用itextpdf来操作PDF文件,定位PDF文件中的关键字坐标演示

    测试结果:

    测试的PDF文件如下:

    junit测试输出坐标:

    maven配置中引入itextpdf:

    <!-- 引入pdf -->
        <dependency>
          <groupId>com.itextpdf</groupId>
          <artifactId>itextpdf</artifactId>
          <version>5.5.13</version>
        </dependency>

    定位工具类PdfHelper.java

    package com.alphajuns.util;
    
    import com.itextpdf.text.pdf.PdfReader;
    import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
    
    import java.io.IOException;
    
    /**
     * @ClassName PdfHelper
     * @Description Pdf帮助类
     * @Author AlphaJunS
     * @Date 2020/3/7 17:40
     * @Version 1.0
     */
    public class PdfHelper {
    
        /**
         * @Author AlphaJunS
         * @Date 18:24 2020/3/7
         * @Description 用于供外部类调用获取关键字所在PDF文件坐标
         * @param filepath
         * @param keyWords
         * @return float[]
         */
        public static float[] getKeyWordsByPath(String filepath, String keyWords) {
            float[] coordinate = null;
            try{
                PdfReader pdfReader = new PdfReader(filepath);
                coordinate = getKeyWords(pdfReader, keyWords);
            } catch (IOException e) {
                e.printStackTrace();
            }
            return coordinate;
        }
    
        /**
         * @Author AlphaJunS
         * @Date 18:26 2020/3/7
         * @Description 获取关键字所在PDF坐标
         * @param pdfReader
         * @param keyWords
         * @return float[]
         */
        private static float[] getKeyWords(PdfReader pdfReader, String keyWords) {
            float[] coordinate = null;
            int page = 0;
            try{
                int pageNum = pdfReader.getNumberOfPages();
                PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);
                CustomRenderListener renderListener = new CustomRenderListener();
                renderListener.setKeyWord(keyWords);
                for (page = 1; page <= pageNum; page++) {
                    renderListener.setPage(page);
                    pdfReaderContentParser.processContent(page, renderListener);
                    coordinate = renderListener.getPcoordinate();
                    if (coordinate != null) break;
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            return coordinate;
        }
    
    }

    pdf帮助类CustomRenderListener.java:

    package com.alphajuns.util;
    
    import com.itextpdf.awt.geom.Rectangle2D.Float;
    import com.itextpdf.text.pdf.parser.ImageRenderInfo;
    import com.itextpdf.text.pdf.parser.RenderListener;
    import com.itextpdf.text.pdf.parser.TextRenderInfo;
    
    /**
     * @Author AlphaJunS
     * @Date 12:53 2020/3/7
     * @Description pdf签名帮助类
     */
    public class CustomRenderListener implements RenderListener{
    
        private float[] pcoordinate = null;
    
        private String keyWord;
    
        private int page;
    
        public int getPage() {
            return page;
        }
    
        public void setPage(int page) {
            this.page = page;
        }
    
        public float[] getPcoordinate(){
            return pcoordinate;
        }
    
        public String getKeyWord() {
            return keyWord;
        }
    
        public void setKeyWord(String keyWord) {
            this.keyWord = keyWord;
        }
    
        @Override
        public void beginTextBlock() {}
    
        @Override
        public void endTextBlock() {}
    
        @Override
        public void renderImage(ImageRenderInfo arg0) {}
    
        @Override
        public void renderText(TextRenderInfo textRenderInfo) {
            String text = textRenderInfo.getText();
            if (null != text && text.contains(keyWord)) {
                Float boundingRectange = textRenderInfo.getBaseline().getBoundingRectange();
                pcoordinate = new float[3];
                pcoordinate[0] = boundingRectange.x;
                pcoordinate[1] = boundingRectange.y;
                pcoordinate[2] = page;
            }
        }
    
    }
  • 相关阅读:
    Gitee + PicGo搭建图床 & Typora上传图片到图床
    算法思维 ---- 双指针法
    Floyd's cycle-finding algorithm
    Boyer-Moore Voting Algorithm
    youtube-dl 使用小记
    算法思维 ---- 滑动窗口
    Memo
    英语
    BZOJ 3270
    BZOJ 3196
  • 原文地址:https://www.cnblogs.com/alphajuns/p/12436332.html
Copyright © 2011-2022 走看看