zoukankan      html  css  js  c++  java
  • ForkJoin统计文件夹中包含关键词的数量

    package com.oxygen.forkjoin.model;
    
    import java.util.List;
    /**
     * 文档
     * @author renguanyu
     *
     */
    public class Document {
        
        private List<String> lines;
        
        public Document(List<String> lines) {
            super();
            this.lines = lines;
        }
    
        public List<String> getLines() {
            return lines;
        }
    
        public void setLines(List<String> lines) {
            this.lines = lines;
        }
        
    }
    package com.oxygen.forkjoin.model;
    
    import java.util.List;
    /**
     * 文件夹
     * @author renguanyu
     *
     */
    public class Folder {
        
        private List<Folder> subFolders;
        private List<Document> documents;
        
        public Folder(List<Folder> subFolders, List<Document> documents) {
            this.subFolders = subFolders;
            this.documents = documents;
        }
    
        public List<Folder> getSubFolders() {
            return subFolders;
        }
    
        public void setSubFolders(List<Folder> subFolders) {
            this.subFolders = subFolders;
        }
    
        public List<Document> getDocuments() {
            return documents;
        }
    
        public void setDocuments(List<Document> documents) {
            this.documents = documents;
        }
    
    }
    package com.oxygen.forkjoin.service;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    import com.oxygen.forkjoin.model.Document;
    /**
     * 文档服务
     * @author renguanyu
     *
     */
    public class DocumentService {
    
        /**
         * 读取文件中所以数据
         * @param file 文件
         * @return 文档
         */
        public static Document fromFile(File file) {
            List<String> lines = new ArrayList<>();
            try(BufferedReader reader = new BufferedReader(new FileReader(file))) {
                String line = reader.readLine();
                while (line != null) {
                    lines.add(line);
                    line = reader.readLine();
                }
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return new Document(lines);
        }
        
    
        
    }
    package com.oxygen.forkjoin.service;
    
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.concurrent.ForkJoinPool;
    
    import com.oxygen.forkjoin.model.Document;
    import com.oxygen.forkjoin.model.Folder;
    import com.oxygen.forkjoin.task.FolderSearchTask;
    /**
     * 文件夹服务
     * @author renguanyu
     *
     */
    public class FolderService{
        
        /**
         * 递归查询文件夹中所有的数据
         * 1.在内存中建立文件夹的结构
         * 2.把数据都加载到这个结构中,方便下一步计算
         * @param dir 文件夹
         * @return 文件夹
         */
        public static Folder fromDirectory(File dir) {
            List<Document> documents = new ArrayList<>();
            List<Folder> subFolders = new ArrayList<>();
            for (File entry : dir.listFiles()) {
                if (entry.isDirectory()) {
                    subFolders.add(FolderService.fromDirectory(entry));
                } else {
                    documents.add(DocumentService.fromFile(entry));
                }
            }
            return new Folder(subFolders, documents);
        }
        
        /**
         * 获取关键词总数
         * @param targetFolder 目标文件夹
         * @param keyword 关键词
         * @throws IOException
         */
        public static long getKeywordTotal(String targetFolder, String keyword) {
            ForkJoinPool forkJoinPool = new ForkJoinPool();
            //把文件夹中的数据加载到内存中,我这个文件夹中就一个日志文件
            File dir = new File(targetFolder);
            Folder folder = FolderService.fromDirectory(dir);
            //创建一个搜索任务
            FolderSearchTask task = new FolderSearchTask(folder, keyword);
            //开始执行fork/join任务
            long counts = forkJoinPool.invoke(task);
            return counts;
        }
        
    }
    package com.oxygen.forkjoin.task;
    import java.util.List;
    import java.util.concurrent.RecursiveTask;
    
    import com.oxygen.forkjoin.model.Document;
    /**
     * 文档搜索任务
     * @author renguanyu
     *
     */
    public class DocumentSearchTask extends RecursiveTask<Long> {
        
        private static final long serialVersionUID = 1L;
        
        private Document document;
        private String searchedWord;
        
        public DocumentSearchTask(Document document, String searchedWord) {
            super();
            this.document = document;
            this.searchedWord = searchedWord;
        }
        
        @Override
        protected Long compute() {
            long count = 0;
            List<String> lines = document.getLines();
            for (String line : lines) {
                String[] words = line.trim().split("(\s|\p{Punct})+");
                for (String word : words) {
                    if (searchedWord.equals(word)) {
                        count = count + 1;
                    }
                }
            }
            return count;
        }
        
    }
    package com.oxygen.forkjoin.task;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.concurrent.RecursiveTask;
    
    import com.oxygen.forkjoin.model.Document;
    import com.oxygen.forkjoin.model.Folder;
    /**
     * 文件夹搜索任务
     * @author renguanyu
     *
     */
    public class FolderSearchTask extends RecursiveTask<Long> {
        
        private static final long serialVersionUID = 1L;
        
        private Folder folder;
        private String searchedWord;
        
        public FolderSearchTask(Folder folder, String searchedWord) {
            super();
            this.folder = folder;
            this.searchedWord = searchedWord;
        }
        //计算方法
        @Override
        protected Long compute() {
            long count = 0L;
            List<RecursiveTask<Long>> forks = new ArrayList<>();
            //获取文件夹下的子文件夹
            for (Folder subFolder : folder.getSubFolders()) {
                //递归文件夹搜索任务
                FolderSearchTask task = new FolderSearchTask(subFolder, searchedWord);
                //把任务添加到分叉列表,用于合并任务
                forks.add(task);
                //放到工作队列中
                task.fork();
            }
            //获取文件夹下的文档
            for (Document document : folder.getDocuments()) {
                
                DocumentSearchTask task = new DocumentSearchTask(document, searchedWord);
                //把任务添加到分叉列表,用于合并任务
                forks.add(task);
                //放到工作队列中
                task.fork();
            }
            //合并工作队列中各个线程计算结果的值
            for (RecursiveTask<Long> task : forks) {
                count = count + task.join();
            }
            return count;
        }
    }
    package com.oxygen.forkjoin.test;
    
    import java.io.IOException;
    import com.oxygen.forkjoin.service.FolderService;
    /**
     * 测试程序
     * @author renguanyu
     *
     */
    public class MainTest {
    
        public static void main(String[] args) throws IOException {
    
            long startTime = System.currentTimeMillis();
    
            long counts = FolderService.getKeywordTotal("C:\test\logs\", "null");
    
            long stopTime = System.currentTimeMillis();
    
            long completeTime = stopTime - startTime;
    
            System.out.println(counts + " , fork / join search took " + completeTime + "ms");
        }
    }

     

  • 相关阅读:
    房地产行业的商业智能BusinessIntelligence介绍
    企业混搭应用介绍
    ElasticSearch+NLog+Elmah实现Asp.Net分布式日志管理
    如何寻找“真爱”型合伙人
    微信转发朋友圈小视频就这么简单
    微信 6.5.1 for iOS发布 可以在朋友圈分享相册中的视频
    微信养号教程预防封号
    首场微信小论坛上他们都聊了哪些小程序的议题
    搜狗微信搜索增加平均阅读数和发文数
    微信小程序想要的是无法监测的流量dark social
  • 原文地址:https://www.cnblogs.com/renguanyu/p/9158915.html
Copyright © 2011-2022 走看看