zoukankan      html  css  js  c++  java
  • Atitit poi读取大文件内存溢出的解决 目录 1. poi提供了两种读取excel的方式,一种是类似xml的Dom, 1 2. POI官网上提到XSSF有三种读写excel,POI地址 1 3

    Atitit poi读取大文件内存溢出的解决

     

    目录

    1. poi提供了两种读取excel的方式,一种是类似xml的Dom, 1

    2. POI官网上提到XSSF有三种读写excel,POI地址 1

    3. 可以看到有三种模式:eventmodel方式,基于事件驱动,SAX的方式解析excel 2

    4. /OfficeExcelPrj/src/officefile/ExampleEventUserModel.java 2

    5. ref 11

     

    1. poi提供了两种读取excel的方式,一种是类似xml的Dom,

    把整个文档load到内存中再解析,这种方法在org.apache.poi.xssf.usermodel包中,好用,但占内存;还有一种就是类似xml的Sax解析,每次只读取一条记录,这种方式占用的内存很小,但API用起来没那么方便,专门为读取大文件而设定的,这种方法在org.apache.poi.xssf.eventusermodel包中。

     

     

    1. POI官网上提到XSSF有三种读写excel,POI地址

    :http://poi.apache.org/spreadsheet/index.html。官网的图片:

    1. 可以看到有三种模式:eventmodel方式,基于事件驱动,SAX的方式解析excel

    1、eventmodel方式,基于事件驱动,SAX的方式解析excel(.xlsx是基于OOXML的),CPU和内存消耗非常低,但是只能读不能写

    2、usermodel,就是我们一般使用的方式,这种方式可以读可以写,但是CPU和内存消耗非常大

    3、SXSSF,POI3.8以后开始支持,这种方式只能写excel

     

     

    1. /OfficeExcelPrj/src/officefile/ExampleEventUserModel.java

     

    package officefile;

     

    import java.io.InputStream;

    import java.util.Iterator;

    import java.util.List;

    import java.util.Map;

     

    import org.apache.poi.xssf.eventusermodel.XSSFReader;

    import org.apache.poi.xssf.model.SharedStringsTable;

    import org.apache.poi.xssf.usermodel.XSSFRichTextString;

    import org.apache.commons.lang3.StringUtils;

    import org.apache.log4j.Logger;

    import org.apache.poi.openxml4j.opc.OPCPackage;

    import org.apache.poi.util.StringUtil;

    import org.xml.sax.Attributes;

    import org.xml.sax.ContentHandler;

    import org.xml.sax.InputSource;

    import org.xml.sax.SAXException;

    import org.xml.sax.XMLReader;

    import org.xml.sax.helpers.DefaultHandler;

    import org.xml.sax.helpers.XMLReaderFactory;

     

    import com.attilax.text.StringUtilsT55;

    import com.google.common.collect.Lists;

    import com.google.common.collect.Maps;

     

    //for big file

    public class ExampleEventUserModel {

     

    public void processOneSheet(String filename, String sheetname) throws Exception {

    OPCPackage pkg = OPCPackage.open(filename);

    XSSFReader r = new XSSFReader(pkg);

    SharedStringsTable sst = r.getSharedStringsTable();

     

    XMLReader parser = fetchSheetParser(sst);

     

    // To look up the Sheet Name / Sheet Order / rID,

    // you need to process the core Workbook stream.

    // Normally it's of the form rId# or rSheet#

    // String sheetname = "rId2";

    InputStream sheet2 = r.getSheet(sheetname);

    InputSource sheetSource = new InputSource(sheet2);

    parser.parse(sheetSource);

    sheet2.close();

    }

     

    public void processAllSheets(String filename) throws Exception {

    OPCPackage pkg = OPCPackage.open(filename);

    XSSFReader r = new XSSFReader(pkg);

    SharedStringsTable sst = r.getSharedStringsTable();

     

    XMLReader parser = fetchSheetParser(sst);

     

    Iterator<InputStream> sheets = r.getSheetsData();

    while (sheets.hasNext()) {

    System.out.println("Processing new sheet:\n");

    InputStream sheet = sheets.next();

    InputSource sheetSource = new InputSource(sheet);

    parser.parse(sheetSource);

    sheet.close();

    System.out.println("");

    }

    }

     

    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {

    XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");

    ContentHandler handler = new SheetHandler(sst);

    parser.setContentHandler(handler);

    return parser;

    }

     

    /**

     * See org.xml.sax.helpers.DefaultHandler javadocs

     */

    private static class SheetHandler extends DefaultHandler {

    private SharedStringsTable sst;

    private String lastContents;

    private boolean nextIsString;

    Map m = Maps.newLinkedHashMap();

    String idx_cell;

    CellAti cellx = new CellAti();

     

    private SheetHandler(SharedStringsTable sst) {

    this.sst = sst;

    }

     

    public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {

    // c => cell

    if (name.equals("c")) {

    // Print the cell reference

    // System.out.print(attributes.getValue("r") + " - ");

    idx_cell = attributes.getValue("r");

    // logger.info("Print the cell reference::" + attributes.getValue("r") + " - ");

    // Figure out if the value is an index in the SST

    String cellType = attributes.getValue("t");

    if (cellType != null && cellType.equals("s")) {

    nextIsString = true;

    } else {

    nextIsString = false;

    }

    }

    // Clear contents cache

    lastContents = "";

    }

     

    public void endElement(String uri, String localName, String name) throws SAXException {

    // Process the last contents as required.

    // Do now, as characters() may be called more than once

    if (nextIsString) {

    int idx = Integer.parseInt(lastContents);

    lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();

    nextIsString = false;

    }

     

    // v => contents of a cell

    // Output after we've seen the string contents

    if (name.equals("v")) {

    System.out.println(lastContents);

    logger.info("endElement::" + lastContents);

     

    int indexofDigit1 = StringUtilsT55.indexofDigit(idx_cell);

    int rowindex = Integer.parseInt(idx_cell.substring(indexofDigit1));

    String cellIdx = idx_cell.substring(0, indexofDigit1);

    Map row;

    try {

    row = li.get(rowindex-1);

    } catch (IndexOutOfBoundsException e) {

    row=Maps.newLinkedHashMap();

    row.put(cellIdx, lastContents);

    row.put("row",rowindex);

    li.add(row);

    row = li.get(rowindex-1);

    }

     

    row.put(cellIdx, lastContents);

    row.put("row",rowindex);

    logger.info(row);

     

    }

    }

     

    public void characters(char[] ch, int start, int length) throws SAXException {

    lastContents += new String(ch, start, length);

    logger.info("characters::" + lastContents);/// jeig haosyo mayong

    // F2433 - G2433 - H2433 - I2433 - J2433 - K2433 - L2433 - M2433 -

    // N2433 - O2433 - P2433 - Q2433 - R2433 - S2433 - T2433 - U2433 -

    // V2433 - W2433 - X2433 - Y2433 - Z2433 - AA2433 - AB2433 - AC2433

    // - AD2433 - AE2433 - AF2433 - AG2433 - AH2433 - AI2433 - AJ2433 -

    // AK2433 - AL2433 - AM2433 - AN2433 - AO2433 - 2019-05-23

    // 17:02:16,178$INFO,4728,td:main,lg:officefile.excelUtil2007ver,"characters::5391",officefile.ExampleEventUserModel$SheetHandler.characters.119

     

    }

    }

     

    static org.apache.log4j.Logger logger = Logger.getLogger(excelUtil2007ver.class);

    public static List<Map> li = Lists.newArrayList();

     

    public static void main(String[] args) throws Exception {

     String namesxls = "D:\\0db\\360手机助手导出的联系人.xlsx";

    // String namesxls = "D:\\0db\\test.xlsx";

    ExampleEventUserModel example = new ExampleEventUserModel();

    System.out.println("11");

    example.processOneSheet(namesxls, "rId1");

    for (Map m : li) {

    System.out.println(m);

    }

    // example.processAllSheets(args[0]);

    }

    }

     

     

     

    {B=头, row=37142, D=猪, E=猪头, F=sh-totoKustm, AO=15066666666}

    {B=雅雯, row=37143, D=竺, E=竺雅雯, F=sh-totoKustm, AO=15706266689}

    {B=晨宇, row=37144, D=祝, E=祝晨宇, F=sh-totoKustm, AO=13511909092}

    {B=成明, row=37145, D=祝, E=祝成明, F=sh-totoKustm, AO=18990934939}

    {B=传武, row=37146, D=祝, E=祝传武, F=sh-totoKustm, AO=13883770474}

    {B=玲, row=37147, D=祝, E=祝玲, F=sh-totoKustm, AO=13214156025}

    1. ref

    POI读写大数据量EXCEL - tootwo2 - 博客园.html

  • 相关阅读:
    一次访问ORACLE数据字典的优化
    利用分区优化SQL
    转行程序员的故事
    有一个苹果
    ubuntu10.04启动后出现grub rescue 模式
    source 命令 && . 命令
    ubuntu10.04启动后出现grub rescue 模式
    Android 各国语言缩写各国语言简称 .
    cpu的核心数、线程数、处理器的位数、操作系统的位数、能够支持最大内存 小结
    产品生产的各个阶段:DV,EV,PV ········是什么意思
  • 原文地址:https://www.cnblogs.com/attilax/p/15197288.html
Copyright © 2011-2022 走看看