zoukankan      html  css  js  c++  java
  • JAVA

    package read.document;
    
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.InputStream;
    import java.sql.Connection;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.usermodel.CharacterRun;
    import org.apache.poi.hwpf.usermodel.Range;
    
    import pers.mysql.DBUtil;
    import pers.mysql.MysqlDao;
    import pers.mysql.MysqlDaoImp;
    
    public class WordReading {
    
        public static void main(String[] args) {
    
            String filePath = "*****.doc";
    
            readOnWord(filePath);
    
        }
    
        public static void readOnWord(String filePath) {
    
            if (filePath.endsWith(".doc")) {
    
                // 输入流-基类
                InputStream is = null;
                try {
                    is = new FileInputStream(filePath);
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                    System.out.println("文件打开失败。");
                }
    
                // 加载doc文档
                try {
    
                    HWPFDocument doc = new HWPFDocument(is);
    
                    Range text = doc.getRange();// 整个文档
    
                    /*
                     * 分解word:文本 ->小节 ->段落 ->characterRun(理解为小单元)
                     * section -小节; paragraph - 段落
                     */
    
                    //1分出内容节点
                    Range hotWord = text.getSection(2);// 0-封面,1-目录,2-文本;第3小节
    
                    //2段落处理
                    /*
                     * 维护两个变量
                     * 
                     * 热词和解释区别 :大小-word:26,explaining:18
                     * 
                     */
                    String word = "";
                    String explaining = "";
                    int wordOK = 0;
                    int explainOK = 0;// 判断当前word&explain是否可以填入数据库
    
                    int count = 24;// 读取几条数据到数据库
                    int begin = 2;// 段落读取位置
    
                    for (int i = 0; i < count;) {
                        Range para = hotWord.getParagraph(begin);
                        CharacterRun field = para.getCharacterRun(0);
                        int fontSize = field.getFontSize();
                        if (fontSize == 26) {
                            word = para.text();
                            wordOK = 1;
                            begin++;
                        } else {
                            while (fontSize < 26) {
                                explaining += para.text();
                                begin++;
                                para = hotWord.getParagraph(begin);
                                field = para.getCharacterRun(0);
                                fontSize = field.getFontSize();
                            }
                            explainOK = 1;
                        }
                        // 判断word&explain是否可以填入数据库
                        if (wordOK == 1 && explainOK == 1) {
                            MysqlDaoImp.addData(word, explaining);
                            i++;
                            //填入数据库后,一切归"0"
                            wordOK = 0;
                            explainOK = 0;
                            word="";
                            explaining="";
                        }
                    }
                    // 输出测试
                    // System.out.println("读取:" + "head:");
    
                } catch (IOException e) {
                    e.printStackTrace();
                    System.out.println("IO错误。");
                }
    
            } else {
                System.out.println("文件格式 error:not .doc");
            }
    
        }
    
       
    ...................................................
  • 相关阅读:
    Jquery+ajax+bootstrap
    Js+Jquery
    css(2)+JS
    css
    mysql 高级
    Git
    Redis
    Nginx
    python爬虫 | 一条高效的学习路径
    拉勾网爬取全国python职位并数据分析薪资,工作经验,学历等信息
  • 原文地址:https://www.cnblogs.com/floakss/p/10572285.html
Copyright © 2011-2022 走看看