package read.document;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;
import pers.mysql.DBUtil;
import pers.mysql.MysqlDao;
import pers.mysql.MysqlDaoImp;
public class WordReading {
public static void main(String[] args) {
String filePath = "*****.doc";
readOnWord(filePath);
}
public static void readOnWord(String filePath) {
if (filePath.endsWith(".doc")) {
// 输入流-基类
InputStream is = null;
try {
is = new FileInputStream(filePath);
} catch (FileNotFoundException e) {
e.printStackTrace();
System.out.println("文件打开失败。");
}
// 加载doc文档
try {
HWPFDocument doc = new HWPFDocument(is);
Range text = doc.getRange();// 整个文档
/*
* 分解word:文本 ->小节 ->段落 ->characterRun(理解为小单元)
* section -小节; paragraph - 段落
*/
//1分出内容节点
Range hotWord = text.getSection(2);// 0-封面,1-目录,2-文本;第3小节
//2段落处理
/*
* 维护两个变量
*
* 热词和解释区别 :大小-word:26,explaining:18
*
*/
String word = "";
String explaining = "";
int wordOK = 0;
int explainOK = 0;// 判断当前word&explain是否可以填入数据库
int count = 24;// 读取几条数据到数据库
int begin = 2;// 段落读取位置
for (int i = 0; i < count;) {
Range para = hotWord.getParagraph(begin);
CharacterRun field = para.getCharacterRun(0);
int fontSize = field.getFontSize();
if (fontSize == 26) {
word = para.text();
wordOK = 1;
begin++;
} else {
while (fontSize < 26) {
explaining += para.text();
begin++;
para = hotWord.getParagraph(begin);
field = para.getCharacterRun(0);
fontSize = field.getFontSize();
}
explainOK = 1;
}
// 判断word&explain是否可以填入数据库
if (wordOK == 1 && explainOK == 1) {
MysqlDaoImp.addData(word, explaining);
i++;
//填入数据库后,一切归"0"
wordOK = 0;
explainOK = 0;
word="";
explaining="";
}
}
// 输出测试
// System.out.println("读取:" + "head:");
} catch (IOException e) {
e.printStackTrace();
System.out.println("IO错误。");
}
} else {
System.out.println("文件格式 error:not .doc");
}
}
...................................................