import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 大数据量xlsx文件读取
*/
public class ExcelRead {
//判断excel版本
private static final Logger logger = LoggerFactory.getLogger(ExcelRead.class);
public static Workbook openWorkbook(InputStream in, String filename, String fileFileName) throws IOException {
logger.debug("filename = {},fileFileName= {}", filename, fileFileName);
Workbook wb = null;
if (fileFileName.equalsIgnoreCase(".xlsx")) {
wb = new XSSFWorkbook(in);//Excel 2007
} else {
wb = (Workbook) new HSSFWorkbook(in);//Excel 2003
}
return wb;
}
public static List<String[]> getExcelData(String fileName, String fileFileName){
logger.debug("fileName = {}", fileName);
List<String[]> list = new ArrayList<String[]>();
InputStream in = null;
Workbook wb = null;
try{
in = new FileInputStream(fileName); //创建输入流
wb = openWorkbook(in, fileName, fileFileName);// 获取Excel文件对象
logger.debug("openWorkbook = {}", wb);
Sheet sheet = wb.getSheetAt(0);// 获取文件的指定工作表m 默认的第一个
Row row = null;
Cell cell = null;
int totalRows = sheet.getPhysicalNumberOfRows(); // 总行数
int totalCells = sheet.getRow(0).getPhysicalNumberOfCells();//总列数
//是否结束读取文件
boolean hasNotNext = false;
//是否记录到list
boolean isRecordIn = false;
//第一次为全空的行号
int fristNullRowN =0;
//下一次全为空的行号
int nextNullRowN = 0;
for (int r = 0; r < totalRows; r++) {
isRecordIn = true;
logger.debug("curr line num: {}",r);
if(hasNotNext){
logger.info("end rows num : {}",r);
break;
}//文件结束
row = sheet.getRow(r);
if(row==null){
isRecordIn = false;
logger.debug("row ={}", r);
if(fristNullRowN== 0){
fristNullRowN = r;
}else{
nextNullRowN = r;
if(fristNullRowN+1==nextNullRowN){//连续两行cell全为Null默认结束
hasNotNext =true;
break;
}else{
fristNullRowN =r;
nextNullRowN = 0;
}
}
continue;
}
String[] arr = new String[totalCells];
int curNullCellN = 0;//当前行为Null的Cell个数
for (int c = 0; c < totalCells; c++) {
cell = row.getCell(c);
if(cell==null){
++curNullCellN;
if(curNullCellN == totalCells){
isRecordIn = false;
if(fristNullRowN== 0){
fristNullRowN = r;
}else{
nextNullRowN = r;
if(fristNullRowN+1==nextNullRowN){//连续两行cell全为Null默认结束
hasNotNext =true;
break;
}else{
fristNullRowN =r;
nextNullRowN = 0;
}
}
}else{
continue;
}
}
String cellValue = "";
if (null != cell) {
// 以下是判断数据的类型
switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_NUMERIC: // 数字
cellValue = cell.getNumericCellValue() + "";
DecimalFormat df2 = new DecimalFormat("#");
cellValue = df2.format(cell.getNumericCellValue());
// 时间格式
if (HSSFDateUtil.isCellDateFormatted(cell)) {
Date dd = cell.getDateCellValue();
DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
cellValue = df.format(dd);
}
break;
case HSSFCell.CELL_TYPE_STRING: // 字符串
cellValue = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_BOOLEAN: // Boolean
cellValue = cell.getBooleanCellValue() + "";
break;
case HSSFCell.CELL_TYPE_FORMULA: // 公式
cellValue = cell.getCellFormula() + "";
break;
case HSSFCell.CELL_TYPE_BLANK: // 空值
cellValue = "";
++curNullCellN;
if(curNullCellN == totalCells){
isRecordIn = false;
if(fristNullRowN== 0){
fristNullRowN = r;
}else{
nextNullRowN = r;
if(fristNullRowN+1==nextNullRowN){//连续两行cell全为Null默认结束
hasNotNext =true;
break;
}else{
fristNullRowN =r;
nextNullRowN = 0;
}
}
}
break;
case HSSFCell.CELL_TYPE_ERROR: // 故障
cellValue = "非法字符";
break;
default:
cellValue = "未知类型";
break;
}
logger.error("cellValue {}", cellValue);
arr[c] = cellValue;
}
}
if(isRecordIn){
list.add(arr);
}
logger.debug("=======END {} ========",r);
}
// 返回值集合
logger.debug("=======list size {} ========",list.size());
return list;
}catch(Exception e){
logger.error("getExcelData() is error:",e);
return list;
}finally{
try {
if(wb!=null){
wb.close();
}
if(in!=null){
in.close();
}
}catch (IOException e) {
logger.error("getExcelData() close :",e);
}
}
}
public static void main(String[] args) throws Exception {
String fileName = "D:\test.xlsx";
ExcelRead upload = new ExcelRead();
DecimalFormat df = new DecimalFormat("#");
List<String[]> excelData = upload.getExcelData(fileName, ".xlsx");
int n = 0;
for (int i = 1; i < excelData.size(); i++) {
System.out.println("第" + ++n + "行:");
String[] str = excelData.get(i);
String row1 = str[0];
String row2 = str[1];
String row3 = str[2];
String row4 = str[3];
String row5 = str[4];
String row6 = str[5];
System.out.println(row1);
System.out.println(row2);
System.out.println(row3);
System.out.println(row4);
System.out.println(row5);
System.out.println(row6);
// BasicDBObject dbo = new BasicDBObject();
// dbo.put("juid", strings[0]);
// dbo.put("mobile", strings[2].substring(2));
// list.add(dbo);
}
// DB capture = new ExcelRead().conn("192.168.200.53", 27017, "data_interface");
// DBCollection zzmobile = capture.getCollection("phoneMapping");
// zzmobile.insert(list);
}
}