zoukankan      html  css  js  c++  java
  • 解析csv、 pdf文件

    /**
     * 解析csv文件 到一个list中
     * 每个单元个为一个String类型记录,每一行为一个list。
     * 再将所有的行放到一个总list中
     *
     * @return
     * @throws IOException
     */
    public static List<List<String>> importCsv(MultipartFile file) {
        List<List<String>> dataList = new ArrayList<>();
        BufferedReader brReader = null;
        InputStreamReader inReader = null;
        try {
            inReader = new InputStreamReader(file.getInputStream());
            brReader = new BufferedReader(inReader);
            String rec = null;//一行
            String str;//一个单元格
            while ((rec = brReader.readLine()) != null) {
                Pattern pCells = Pattern.compile("("[^"]*("{2})*[^"]*")*[^,]*,");
                Matcher mCells = pCells.matcher(rec);
                List<String> cells = new ArrayList<>(); //每行记录一个list
                //读取每个单元格
                while (mCells.find()) {
                    str = mCells.group();
                    str = str.replaceAll("(?sm)"?([^"]*("{2})*[^"]*)"?.*,", "$1");
                    str = str.replaceAll("(?sm)("("))", "$2");
                    cells.add(str);
                }
                dataList.add(cells);
            }
        } catch (Exception e) {
        } finally {
            if (brReader != null) {
                try {
                    brReader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (inReader != null) {
                try {
                    inReader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return dataList;
    }

    解析pdf文件
    需要的jar包,配置到maven <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.6</version> </dependency> //demo public static void main(String[] args) { try (PDDocument document = PDDocument.load(new File("pdf文件路径"))) { document.getClass(); if(!document.isEncrypted()) { PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); PDFTextStripper tStripper = new PDFTextStripper(); String pdfFileInText = tStripper.getText(document); String[] lines = pdfFileInText.split("\r?\n"); for(String line : lines) { System.out.println(line); } } } catch (InvalidPasswordException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }  

      

  • 相关阅读:
    Apache ECharts
    navicate10破解版 in win
    mysql5.7.23免安装配置说明in win7
    ubuntu安装intellij IDEA ultimate破解
    java1015 leetcode1 twosum approach2 Map-doc generic PESC type argument/(? extends | super %bounded) parameterized type
    笔试题学习(dp,重叠子问题,卡特兰数,手电过桥,最长公共子序列)
    selfish mining:block‐withholding attacks
    矿工找到block的概率分布函数和函数图像
    proof of reserves and proof of liabilities and fractional reserve
    soft fork and hard fork
  • 原文地址:https://www.cnblogs.com/dreammyone/p/9934628.html
Copyright © 2011-2022 走看看