该程序用于读取word文档的文字内容,如果是艺术字,图片不能读取
先在idea创建maven项目
在pom.xml添加以下依赖
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.17</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.17</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.17</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.17</version> </dependency>
代码:
package com.gong; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hwpf.extractor.WordExtractor; //import org.apache.poi.ooxml.POIXMLDocument; //import org.apache.poi.ooxml.extractor.POIXMLTextExtractor; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; public class Word { public static String ReadDoc(String path) throws IOException { String resullt = ""; //首先判断文件中的是doc/docx try { if (path.endsWith(".doc")) { InputStream is = new FileInputStream(new File(path)); WordExtractor re = new WordExtractor(is); resullt = re.getText(); re.close(); } else if (path.endsWith(".docx")) { OPCPackage opcPackage = POIXMLDocument.openPackage(path); POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); resullt = extractor.getText(); extractor.close(); } else { System.out.println("此文件不是word文件"); } } catch(Exception e){ e.printStackTrace(); } return resullt; } public static void main(String[] args) throws IOException { String path="E:\datas\学习.docx"; String result=ReadDoc(path); System.out.println(result); } }
运行程序在终端打印出来word文档的内容
此文参考了:https://blog.csdn.net/lq18894033018/article/details/97934901