package com;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
//import java.io.FileInputStream;
//import java.io.FileNotFoundException;
//import java.io.IOException;
//import java.util.HashMap;
//import java.util.Iterator;
//import java.util.Map;
//
//import org.apache.poi.hwpf.HWPFDocument;
//import org.apache.poi.hwpf.model.FieldsDocumentPart;
//import org.apache.poi.hwpf.usermodel.Field;
//import org.apache.poi.hwpf.usermodel.Fields;
//import org.apache.poi.hwpf.usermodel.Range;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
public class Test {
public static void main(String[] args) {
/*
try {
//word格式
String path="D:\\workspace\\MyPlatFileNew\\web\\content\\kent\\a6\\uploadattach\\iplat4j01361351007003_20130220170327.doc";
System.out.println("========"+path);
File inputFile = new File(path);
POITextExtractor extractor = ExtractorFactory.createExtractor(inputFile);
System.out.println("Document Text: ");
System.out.println("====================");
System.out.println(extractor.getText());
System.out.println("====================");
}catch (Exception ex) {
ex.printStackTrace();
}*/
//pdf格式
FileInputStream fis = null;
String path="D://知识积累//EL.pdf";
try {
fis = new FileInputStream(path);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
PDFParser p = null;
try {
p = new PDFParser(fis);
} catch (IOException e) {
e.printStackTrace();
}
try {
p.parse();
} catch (IOException e) {
e.printStackTrace();
}
PDFTextStripper ts = null;
try {
ts = new PDFTextStripper();
} catch (IOException e1) {
e1.printStackTrace();
}
String s = null;
try {
s = ts.getText(p.getPDDocument());
System.out.println("----------begin------------");
System.out.println(s);
System.out.println("-----------end-----------");
} catch (IOException e) {
e.printStackTrace();
}
try {
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
需要用到的jar包有pdfbox-1.7.1.jar,poi-3.9-20121203.jar,poi-ooxml-3.9-20121203.jar。