zoukankan      html  css  js  c++  java
  • 解析csv、 pdf文件

    /**
     * 解析csv文件 到一个list中
     * 每个单元个为一个String类型记录,每一行为一个list。
     * 再将所有的行放到一个总list中
     *
     * @return
     * @throws IOException
     */
    public static List<List<String>> importCsv(MultipartFile file) {
        List<List<String>> dataList = new ArrayList<>();
        BufferedReader brReader = null;
        InputStreamReader inReader = null;
        try {
            inReader = new InputStreamReader(file.getInputStream());
            brReader = new BufferedReader(inReader);
            String rec = null;//一行
            String str;//一个单元格
            while ((rec = brReader.readLine()) != null) {
                Pattern pCells = Pattern.compile("("[^"]*("{2})*[^"]*")*[^,]*,");
                Matcher mCells = pCells.matcher(rec);
                List<String> cells = new ArrayList<>(); //每行记录一个list
                //读取每个单元格
                while (mCells.find()) {
                    str = mCells.group();
                    str = str.replaceAll("(?sm)"?([^"]*("{2})*[^"]*)"?.*,", "$1");
                    str = str.replaceAll("(?sm)("("))", "$2");
                    cells.add(str);
                }
                dataList.add(cells);
            }
        } catch (Exception e) {
        } finally {
            if (brReader != null) {
                try {
                    brReader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (inReader != null) {
                try {
                    inReader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return dataList;
    }

    解析pdf文件
    需要的jar包,配置到maven <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.6</version> </dependency> //demo public static void main(String[] args) { try (PDDocument document = PDDocument.load(new File("pdf文件路径"))) { document.getClass(); if(!document.isEncrypted()) { PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); PDFTextStripper tStripper = new PDFTextStripper(); String pdfFileInText = tStripper.getText(document); String[] lines = pdfFileInText.split("\r?\n"); for(String line : lines) { System.out.println(line); } } } catch (InvalidPasswordException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }  

      

  • 相关阅读:
    hrbust 1840 (树状数组第k大) 删点使用
    20150211--Smarty2-02
    20150211--Smarty2-01
    20150210--Smarty1-02
    20150210--Smarty1-01
    20150209--JS巩固与加强6-02
    20150209--JS巩固与加强6-01
    20150207--JS巩固与加强5
    20150206--JS巩固与加强4-02
    20150206--JS巩固与加强4
  • 原文地址:https://www.cnblogs.com/dreammyone/p/9934628.html
Copyright © 2011-2022 走看看