public class pdfAnalysis {
/**
* @throws IOException
* @param从网络上下载PDF,截取PDF字符串,
*/
public static void main(String[] args) throws IOException {
// 下载的连接 下载下来的名字 下载下来的路径
// pdfAnalysis.downLoadByUrl("", "KK.pdf", "F:/");
// 读取文件
pdfAnalysis pdf = new pdfAnalysis();
// 读取文件
String pdfName = "F:\CC.pdf";
// 解析PDF里的值 存入变量pdf_Body
String pdf_Body = pdf.readFileOfPDF(pdfName);
//System.out.println(pdf_Body);
/* String str = pdf_Body.substring(pdf_Body.indexOf("Arrival"), pdf_Body.indexOf("Payment Details"));
String str1 = str.substring(str.indexOf("H ("));
String [] pp ={"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday" ,"Sunday" };
for(String sto:pp){
if(str1.contains(sto)){
String result = str1.substring(str1.indexOf(sto));
//System.out.println(result);
//System.out.println(result.length());
String result2 = result.substring(0,result.indexOf(","));
String result3 = result2.trim();
System.out.println("我要的时间:"+result3+"我是"+pdfName+"文件");
}
}*/
/*if(str1.contains("Monday")||str1.contains("Tuesday")||
str1.contains("Wednesday")||str1.contains("Thursday")||
str1.contains("Friday")||str1.contains("Saturday")||str1.contains("Sunday")){
}*/
// System.out.println(str1);
// 取出人名值
String name_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive"), pdf_Body.indexOf("passenger details"));
// System.out.println(str);
String name_Temp1 = null;
String result_name = null;
List<String> list_Name = new ArrayList<>();
for (int i = 1; i < name_Temp.length(); i++) {
if (name_Temp.contains(i + ".")) {
name_Temp1 = name_Temp.substring(name_Temp.indexOf(i + "."));
result_name = name_Temp1.substring(name_Temp1.indexOf(i + ".") + 3,
name_Temp1.indexOf("Seat Number Services"));
list_Name.add(result_name);
}
// System.out.println(add);
// System.out.println(str2);
if (name_Temp1.equals("null")) {
continue;
}
}
for (String i : list_Name) {
System.out.println("所有的人名:" + i);
}*/
if (pdfAnalysis.infile != null) {
pdfAnalysis.infile.close();
System.out.println("我要准备关闭PDF文档了");
}
}
public static int appearNumber(String srcText, String temp) {
int count = 0;
Pattern p = Pattern.compile(temp);
Matcher m = p.matcher(srcText);
while (m.find()) {
count++;
}
return count;
}
public static FileInputStream infile = null;
public String readFileOfPDF(String pdfName) throws IOException {
String context = null;
File file = new File(pdfName);// 创建一个文件对象
try {
infile = new FileInputStream(pdfName);// 创建一个文件输入流
// 新建一个PDF解析器对象
PDFParser parser = new PDFParser(infile);
// 对PDF文件进行解析
parser.parse();
// 获取解析后得到的PDF文档对象
PDDocument pdfdocument = parser.getPDDocument();
// 新建一个PDF文本剥离器
PDFTextStripper stripper = new PDFTextStripper();
// 从PDF文档对象中剥离文本
context = stripper.getText(pdfdocument);
System.out.println("PDF文件" + file.getAbsolutePath() + "的文本内容如下:");
// System.out.println(context);
} catch (Exception e) {
System.out.println("读取PDF文件" + file.getAbsolutePath() + "失败!" + e.getMessage());
} finally {
if (infile != null) {
try {
infile.close();
} catch (IOException e1) {
}
}
}
return context;
}