zoukankan      html  css  js  c++  java
  • Apache POI使用

    使用apache poi解析 Excel文件:

    package excellucene;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileWriter;
    import java.io.FilenameFilter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.PrintWriter;
    
    import org.apache.poi.EncryptedDocumentException;
    import org.apache.poi.hssf.extractor.ExcelExtractor;
    import org.apache.poi.hssf.usermodel.HSSFWorkbook;
    import org.apache.poi.ss.usermodel.Cell;
    import org.apache.poi.ss.usermodel.Row;
    import org.apache.poi.ss.usermodel.Sheet;
    import org.apache.poi.ss.usermodel.Workbook;
    import org.apache.poi.ss.usermodel.WorkbookFactory;
    
    import com.sun.media.sound.InvalidFormatException;
    
    public class ParseExcel {
    
        public static void main(String[] args) throws IOException {
    
            String path = "C:\Users\Desktop\a01hos\img";
            File f = new File(path);
            File[] files = f.listFiles();
            System.out.println(files.length);
    
            File[] filesxls = f.listFiles(new FilenameFilter() {
    
                public boolean accept(File dir, String name) {
                    if (name.endsWith(".xls") || name.endsWith(".xlsx")) {
                        return true;
                    }
                    return false;
                }
            });
            System.out.println("Excel文件有: " + filesxls.length);
    
            for (File f2 : filesxls) {
                String fileDirectPathName = f2.getCanonicalPath();
                System.out.println(fileDirectPathName);
                // System.out.println("文件名: " + f2.getName());
    
                new ParseExcel().parseXml(fileDirectPathName);
            }
    
            /*
             * IndexWriter writer; // 创建 Lucene Index Writer Directory dir =
             * FSDirectory.open(Paths.get("f:/excelindex")); writer = new
             * IndexWriter(dir, new IndexWriterConfig( new StandardAnalyzer()));
             * 
             * for (File f2 : filesxls) { // FileReader fr = new FileReader(f); //
             * BufferedReader br = new BufferedReader(fr);
             * System.out.println(f2.getCanonicalPath()); System.out.println("文件名: "
             * + f2.getName());
             * 
             * 
             * // 创建dom对象创建索引 创建索引 Document document = new Document();
             * 
             * Document doc = new Document(); doc.add(new Field("contents",
             * ExcelFileReader(f2.getCanonicalPath()), TextField.TYPE_NOT_STORED));
             * doc.add(new Field("filename", f2.getName(), TextField.TYPE_STORED));
             * doc.add(new StringField("fullpath", f2.getCanonicalPath(),
             * Field.Store.YES));
             * 
             * writer.addDocument(doc);
             * 
             * writer.numDocs();
             * 
             * }
             */
        }
    
        /**
         * Excel表格提取数据
         * 
         * @param fileName
         *            路径
         * @return
         * @throws IOException
         */
        public static String ExcelFileReader(String fileName) throws IOException {
            InputStream path = new FileInputStream(fileName);
            String content = null;
            // 1、创建新的Excel文件
            HSSFWorkbook wb = new HSSFWorkbook(path);
            ExcelExtractor extractor = new ExcelExtractor(wb);
            extractor.setFormulasNotResults(true);
            extractor.setIncludeSheetNames(false);
            content = extractor.getText();
            return content;
        }
    
        public void parseXml(String filename) {
            Workbook wb = null;
            try {
                wb = WorkbookFactory.create(new File(filename));
                
                
                
                
                
                
                Sheet sheet = wb.getSheetAt(0);
    
                for (Row row : sheet) {
    
                    for (Cell cell : row) {
                        System.out.print(getCellValue(cell) + "---");
                        save(getCellValue(cell) + "---");
                    }
                    System.out.println();
                }
            } catch (EncryptedDocumentException e) {
                e.printStackTrace();
            } catch (InvalidFormatException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } catch (org.apache.poi.openxml4j.exceptions.InvalidFormatException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    
        public Object getCellValue(Cell cell) {
            int type = cell.getCellType();
            String show = null;
            switch (type) {
            case Cell.CELL_TYPE_BLANK:// 空值
                show = null;
                break;
            case Cell.CELL_TYPE_BOOLEAN:// Boolean
                show = String.valueOf(cell.getBooleanCellValue());
                break;
            case Cell.CELL_TYPE_ERROR:// 故障
                show = String.valueOf(cell.getErrorCellValue());
                break;
            case Cell.CELL_TYPE_FORMULA:// 公式
                show = cell.getCellFormula();
                break;
            case Cell.CELL_TYPE_NUMERIC:// 数字
                show = String.valueOf(cell.getNumericCellValue());
                break;
            case Cell.CELL_TYPE_STRING:// 字符串
                show = cell.getStringCellValue();
                break;
            default:
                show = null;
            }
            return show;
        }
        
        /**
         * 保存字符串到文本中
         * 
         * @param str
         */
        public boolean save(String str) {
            boolean flag = false; // 声明操作标记
    
            String fileName = "file/haha.txt"; // 定义文件名
            
            File f = new File(fileName);
            
            if(!f.exists()){
                try {
                    f.createNewFile();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            
            FileWriter fw = null; // 用来写入字符文件的便捷类
            PrintWriter out = null; // 向文本输出流打印对象的格式化表示形式类
    
            try {
                fw = new FileWriter(f, true); // 创建一个FileWriter
                out = new PrintWriter(fw); // 创建一个PrintWriter,以追加方式将内容插入到最后一行
                out.println(str); // 将字符串打印到文本中
                out.flush(); // 刷新缓存
    
                flag = true;
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                try {
                    // 关闭PrintWriter
                    if (out != null) {
                        out.close();
                        out = null;
                    }
                    // 关闭FileWriter
                    if (fw != null) {
                        fw.close();
                        fw = null;
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            return flag;
        }
    
    }

    使用lucene建立索引:

    package excellucene;
    
    import java.io.IOException;
    import java.nio.file.Paths;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.FSDirectory;
    
    public class SearchExcel {
    
        public static void main(String[] args) throws IOException, ParseException {
            if(args.length!=2){
                throw new IllegalArgumentException(SearchExcel.class.getName()+" <> <query>");
                
            }
            
    //        String indexDir = args[0];//解析输入的索引路径
    //        String q = args[1];//解析输入的查询字符串
            
            String indexDir = "F:\excelindex";
            String q = "zhangxing";
            
            search(indexDir, q);
            
        }
        
        public static void search(String indexDir, String q) throws IOException, ParseException{
    //        Directory dir = FSDirectory.open(Paths.get(indexDir));
            
            IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir)));
            
            IndexSearcher is = new IndexSearcher(reader);
            
            Analyzer analyzer = new StandardAnalyzer();
            
            // 需要添加  .jar 包
    //        lucene-queryparser-7.4.0.jar
            
            QueryParser parser = new QueryParser("filename", analyzer);
            
            Query query = parser.parse(q);
            
            long start = System.currentTimeMillis();
            TopDocs hits = is.search(query, 10);
            long end = System.currentTimeMillis();
            
            System.err.println("Found "+hits.totalHits+" document(s) (in "+ (end-start) +" milliseconds) that matched query'"+q+"':");
            
            for(ScoreDoc scoreDoc:hits.scoreDocs){
                Document doc = is.doc(scoreDoc.doc);
                System.out.println(doc.get("fullpath"));
                
            }
        }
    }

     使用了的jar包:

  • 相关阅读:
    App架构师实践指南四之性能优化一
    App架构师实践指南三之基础组件
    App架构师实践指南二之App开发工具
    App架构师实践指南一之App基础语法
    Linux下阅读MHT文件
    What Is Docker & Docker Container ? A Deep Dive Into Docker !
    Difference between Docker Image and Container?
    RabbitMQ .NET/C# Client API Guide
    How RabbitMQ Works and RabbitMQ Core Concepts
    Message Queue vs Message Bus — what are the differences?
  • 原文地址:https://www.cnblogs.com/moonsoft/p/10620745.html
Copyright © 2011-2022 走看看