zoukankan      html  css  js  c++  java
  • java读取各种类型文件

    用到的几个包
    
    bcmail-jdk14-132.jar/bcprov-jdk14-132.jar/checkstyle-all-4.2.jar/FontBox-0.1.0-dev.jar/lucene-core-2.0.0.jar/PDFBox-0.7.3.jar/poi-3.0-alpha3-20061212.jar/poi-contrib-3.0-alpha3-20061212.jar/poi-scratchpad-3.0-alpha3-20061212.jar
    
    import java.io.BufferedReader;
    import java.io.FileInputStream;
    import java.io.FileReader;
    import java.io.IOException;
    import org.apache.poi.hslf.HSLFSlideShow;
    import org.apache.poi.hslf.model.Slide;
    import org.apache.poi.hslf.model.TextRun;
    import org.apache.poi.hslf.usermodel.SlideShow;
    import org.apache.poi.hssf.usermodel.HSSFCell;
    import org.apache.poi.hssf.usermodel.HSSFRow;
    import org.apache.poi.hssf.usermodel.HSSFSheet;
    import org.apache.poi.hssf.usermodel.HSSFWorkbook;
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.usermodel.Paragraph;
    import org.apache.poi.hwpf.usermodel.Range;
    import org.pdfbox.pdfparser.PDFParser;
    import org.pdfbox.util.PDFTextStripper;
    
    public class Test {
    
    /**
    * @param args
    */
    public static void p(Object obj) {
       System.out.println(obj);
    }
    
    public static void main(String[] args) {
       try {
        p(readPpt("src/1.dps"));
       } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
       }
    
    }
    //读取ppt
    public static String readPpt(String path) throws Exception {
    
       StringBuffer content = new StringBuffer("");
       try {
        SlideShow ss = new SlideShow(new HSLFSlideShow(path));// path为文件的全路径名称,建立SlideShow
        Slide[] slides = ss.getSlides();// 获得每一张幻灯片
        for (int i = 0; i < slides.length; i++) {
         TextRun[] t = slides[i].getTextRuns();// 为了取得幻灯片的文字内容,建立TextRun
         for (int j = 0; j < t.length; j++) {
          content.append(t[j].getText());// 这里会将文字内容加到content中去
         }
         content.append(slides[i].getTitle());
        }
       } catch (Exception ex) {
        System.out.println(ex.toString());
       }
       return content.toString().trim();
    
    }
    // 读取xls
    public static String readXls(String path) throws Exception {
       StringBuffer content = new StringBuffer("");// 文档内容
       HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(path));
       int sheetCount = workbook.getNumberOfSheets();// excel几张表
       for (int i = 0; i < sheetCount; i++) {// 遍历excel表
        HSSFSheet sheet = workbook.getSheetAt(i);// 对excel的第一个表引用
        int rowCount = sheet.getLastRowNum();// 取得最后一行的下标
        for (int j = 0; j < rowCount; j++) {// 循环每一行
         HSSFRow row = sheet.getRow(j);// 引用行
         if (row == null) {
          continue;
         } else {
          short cellNum = row.getLastCellNum();
          for (short m = 0; m < cellNum; m++) {
           HSSFCell cell = row.getCell(m);// 引用行中的一个单元格
           if (cell != null) {
            int cellType = cell.getCellType();
            // CELL_TYPE_NUMERIC 0 数字
            // CELL_TYPE_STRING 1 字符串
            // CELL_TYPE_FORMULA 2 公式
            // CELL_TYPE_BLANK 3 空格
            // CELL_TYPE_BOOLEAN 4 布尔值
            // CELL_TYPE_ERROR 5 错误
            switch (cellType) {
            // 单元格类型为数字
            case HSSFCell.CELL_TYPE_NUMERIC:
             // 取数字单元格的值
             double d = cell.getNumericCellValue();
             content.append(String.valueOf(d) + "   ");
             break;
            // 单元格类型为字符串
            case HSSFCell.CELL_TYPE_STRING:
             String str = cell.getStringCellValue().trim();
             if (!str.equals("")) {
              content.append(str + "   ");
             }
             break;
            // 单元格类型为公式
            case HSSFCell.CELL_TYPE_FORMULA:
             // 不读取公式
             // String formula = cell.getCellFormula();
             // content = content + formula+" ";
             break;
            // 单元格类型为空白
            case HSSFCell.CELL_TYPE_BLANK:
             break;
            // 单元格类型为布尔值
            case HSSFCell.CELL_TYPE_BOOLEAN:
             // boolean bool = cell.getBooleanCellValue();
             // content = content + bool+" ";
             break;
            // 单元格类型为错误
            case HSSFCell.CELL_TYPE_ERROR:
             // byte errorCode = cell.getErrorCellValue();
             // content = content + errorCode+" ";
             break;
            default:
             break;
            }
           } else {
            // content = content + "..." +" ";//没有数据的单元格使用...填充
           }
          }
         }
         content.append("
    ");
        }
       }
       return content.toString().trim();
    }
    
    // 读取pdf
    public static String readPdf(String path) throws Exception {
       StringBuffer content = new StringBuffer("");// 文档内容
       FileInputStream fis = new FileInputStream(path);
       PDFParser p = new PDFParser(fis);
       p.parse();
       PDFTextStripper ts = new PDFTextStripper();
       content.append(ts.getText(p.getPDDocument()));
       fis.close();
       return content.toString().trim();
    }
    
    // 读取word,只能读取文本内容 图片不行
    public static String readWord(String path) throws Exception {
    
       StringBuffer content = new StringBuffer("");// 文档内容
       HWPFDocument doc = new HWPFDocument(new FileInputStream(path));
       Range range = doc.getRange();
       int paragraphCount = range.numParagraphs();// 段落
       for (int i = 0; i < paragraphCount; i++) {// 遍历段落读取数据
        Paragraph pp = range.getParagraph(i);
        content.append(pp.text());
       }
       return content.toString().trim();
    }
    
    // 读取text
    public static String readTxt(String path) {
       StringBuffer content = new StringBuffer("");// 文档内容
       try {
        FileReader reader = new FileReader(path);
        BufferedReader br = new BufferedReader(reader);
        String s1 = null;
    
        while ((s1 = br.readLine()) != null) {
         content.append(s1 + "
    ");
        }
        br.close();
        reader.close();
       } catch (IOException e) {
        e.printStackTrace();
       }
       return content.toString().trim();
    }
    
    }
    

      

    来自于:https://www.cnblogs.com/candl/p/3592649.html

    读取xls表格:https://juejin.im/entry/5a5f03e76fb9a01cb42c643e

    读取excel:http://www.voidcn.com/article/p-akhurrpc-bbh.html

  • 相关阅读:
    oracle 进阶之model子句
    SiteMesh的使用--笔记
    JZ49:把字符串转换成整数
    面试题45:把数组排成最小的数
    面试题43:1~n整数中1出现的次数
    面试题42:连续子数组的最大和
    面试题41:数据中的中位数
    面试题40:最小的k个数
    面试题39:数组中出现次数超过一半的数字
    面试题38:字符串的排列
  • 原文地址:https://www.cnblogs.com/Pjson/p/10266594.html
Copyright © 2011-2022 走看看