Atitit poi读取大文件内存溢出的解决
目录
1. poi提供了两种读取excel的方式,一种是类似xml的Dom, 1
2. POI官网上提到XSSF有三种读写excel,POI地址 1
3. 可以看到有三种模式:eventmodel方式,基于事件驱动,SAX的方式解析excel 2
4. /OfficeExcelPrj/src/officefile/ExampleEventUserModel.java 2
把整个文档load到内存中再解析,这种方法在org.apache.poi.xssf.usermodel包中,好用,但占内存;还有一种就是类似xml的Sax解析,每次只读取一条记录,这种方式占用的内存很小,但API用起来没那么方便,专门为读取大文件而设定的,这种方法在org.apache.poi.xssf.eventusermodel包中。
:http://poi.apache.org/spreadsheet/index.html。官网的图片:
1、eventmodel方式,基于事件驱动,SAX的方式解析excel(.xlsx是基于OOXML的),CPU和内存消耗非常低,但是只能读不能写
2、usermodel,就是我们一般使用的方式,这种方式可以读可以写,但是CPU和内存消耗非常大
3、SXSSF,POI3.8以后开始支持,这种方式只能写excel
package officefile;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.util.StringUtil;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import com.attilax.text.StringUtilsT55;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
//for big file
public class ExampleEventUserModel {
public void processOneSheet(String filename, String sheetname) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
// To look up the Sheet Name / Sheet Order / rID,
// you need to process the core Workbook stream.
// Normally it's of the form rId# or rSheet#
// String sheetname = "rId2";
InputStream sheet2 = r.getSheet(sheetname);
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
public void processAllSheets(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
/**
* See org.xml.sax.helpers.DefaultHandler javadocs
*/
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
Map m = Maps.newLinkedHashMap();
String idx_cell;
CellAti cellx = new CellAti();
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
// c => cell
if (name.equals("c")) {
// Print the cell reference
// System.out.print(attributes.getValue("r") + " - ");
idx_cell = attributes.getValue("r");
// logger.info("Print the cell reference::" + attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name) throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
System.out.println(lastContents);
logger.info("endElement::" + lastContents);
int indexofDigit1 = StringUtilsT55.indexofDigit(idx_cell);
int rowindex = Integer.parseInt(idx_cell.substring(indexofDigit1));
String cellIdx = idx_cell.substring(0, indexofDigit1);
Map row;
try {
row = li.get(rowindex-1);
} catch (IndexOutOfBoundsException e) {
row=Maps.newLinkedHashMap();
row.put(cellIdx, lastContents);
row.put("row",rowindex);
li.add(row);
row = li.get(rowindex-1);
}
row.put(cellIdx, lastContents);
row.put("row",rowindex);
logger.info(row);
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
lastContents += new String(ch, start, length);
logger.info("characters::" + lastContents);/// jeig haosyo mayong
// F2433 - G2433 - H2433 - I2433 - J2433 - K2433 - L2433 - M2433 -
// N2433 - O2433 - P2433 - Q2433 - R2433 - S2433 - T2433 - U2433 -
// V2433 - W2433 - X2433 - Y2433 - Z2433 - AA2433 - AB2433 - AC2433
// - AD2433 - AE2433 - AF2433 - AG2433 - AH2433 - AI2433 - AJ2433 -
// AK2433 - AL2433 - AM2433 - AN2433 - AO2433 - 2019-05-23
// 17:02:16,178$INFO,4728,td:main,lg:officefile.excelUtil2007ver,"characters::5391",officefile.ExampleEventUserModel$SheetHandler.characters.119
}
}
static org.apache.log4j.Logger logger = Logger.getLogger(excelUtil2007ver.class);
public static List<Map> li = Lists.newArrayList();
public static void main(String[] args) throws Exception {
String namesxls = "D:\\0db\\360手机助手导出的联系人.xlsx";
// String namesxls = "D:\\0db\\test.xlsx";
ExampleEventUserModel example = new ExampleEventUserModel();
System.out.println("11");
example.processOneSheet(namesxls, "rId1");
for (Map m : li) {
System.out.println(m);
}
// example.processAllSheets(args[0]);
}
}
{B=头, row=37142, D=猪, E=猪头, F=sh-totoKustm, AO=15066666666}
{B=雅雯, row=37143, D=竺, E=竺雅雯, F=sh-totoKustm, AO=15706266689}
{B=晨宇, row=37144, D=祝, E=祝晨宇, F=sh-totoKustm, AO=13511909092}
{B=成明, row=37145, D=祝, E=祝成明, F=sh-totoKustm, AO=18990934939}
{B=传武, row=37146, D=祝, E=祝传武, F=sh-totoKustm, AO=13883770474}
{B=玲, row=37147, D=祝, E=祝玲, F=sh-totoKustm, AO=13214156025}
POI读写大数据量EXCEL - tootwo2 - 博客园.html