//今天根据课本写了个 解析pdf文档的小程序
import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.util.PDFTextStripper; public class ExtractorPDF { /** * @param args */ public static String getText(String file) //throws Exception {String pdfFile=file; PDDocument document=null; String s=null; try { //装载文件 document=PDDocument.load(pdfFile); //用PDFTextStripper来提取 文件 PDFTextStripper stripper=new PDFTextStripper(); s=stripper.getText(document); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (document!=null) try { document.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return s; } public static void toTextFile(String file,String txt) { String pdfFile=file; PDDocument document=null; try { //加载文件 document=PDDocument.load(pdfFile); //用PDFTextStripper提取文件 PDFTextStripper stripper=new PDFTextStripper(); PrintWriter pw=new PrintWriter(new FileWriter(txt)); stripper.writeText(document, pw); pw.close(); System.out.println("成功写入文本文件"+txt); } catch (IOException e) { // TODO Auto-generated catch block System.out.println("文本写入失败"); e.printStackTrace(); } finally { if(document!=null) {try { document.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }} } } public static void main(String[] args) { // TODO Auto-generated method stub String s=getText("G:/学习资料/软件大赛学习资料/网上淘宝.pdf "); System.out.println(s); toTextFile("G:/学习资料/软件大赛学习资料/网上淘宝.pdf ","G:/Lucene/PDF.txt"); } }