zoukankan      html  css  js  c++  java
  • 统计文本中重复的内容

    1.统计一个文本中重复的内容

    package count;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileReader;
    import java.io.InputStreamReader;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.Map;
    import java.util.Set;
    
    public class countWord {
    
        public static void main(String[] args) {
          count("F:\A\B.xml");
        }
    
    public static void count(String filepath)
    {
        try
        {
            File file = new File(filepath);
            if(!file.exists())
            {
                System.out.println("file not exist");
                return;
            }
    
            //create BufferedReader to improve efficient
            InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");  
            BufferedReader bufReader = new BufferedReader(isr); 
            String line = null;
    
            //create map collection to record information
            Map<String,Integer> map = new HashMap<String,Integer>();
            while((line = bufReader.readLine()) != null)
            {
                if(map.containsKey(line))
                    map.put(line,map.get(line)+1);
                else
                    map.put(line,1);
            }
            //print map collction
            showMap(map);
        }
        catch (Exception ex)
        {
            ex.printStackTrace();
        }
    }
    private static void showMap(Map<String,Integer> map)
    {
        if(map == null)
            return;
        Set<String> keyset = map.keySet();
        Iterator<String> it = keyset.iterator();
        int count = 0;
        while(it.hasNext())
        {
            String s = it.next();
            if(map.get(s) > 1) {//个数大于1
                System.out.println( s+ "......" + map.get(s));
                count++;
            }
        }
        System.out.println("重复两次的数据:" + count);
    }
    }

    2.统计两个文本中重复的内容

    package count;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileReader;
    import java.io.InputStreamReader;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.Map;
    import java.util.Set;
    
    public class countWordTowFile {
    
        public static void main(String[] args) {
          count("F:\A\B.xml","C:\D\E.txt");
        }
    
    
    public static void count(String filepath,String filepath2)
    {
        try
        {
            File file = new File(filepath);
            File file2 = new File(filepath2);
            if(!file.exists() || !file2.exists())
            {
                System.out.println("file not exist");
                return;
            }
    
            //create BufferedReader to improve efficient
            InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");  
            InputStreamReader isr2 = new InputStreamReader(new FileInputStream(file2), "UTF-8");  
            BufferedReader bufReader = new BufferedReader(isr); 
            BufferedReader bufReader2 = new BufferedReader(isr2); 
            String line = null;
            String line2 = null;
    
            //create map collection to record information
            Map<String,Integer> map = new HashMap<String,Integer>();
            Map<String,Integer> map2 = new HashMap<String,Integer>();
            while((line = bufReader.readLine()) != null)//读取第一个文件中的数据
            {
                map.put(line,1);
            }
            while((line2 = bufReader2.readLine()) != null) {//读取第二个文件中的内容
                if(map.containsKey(line2)) {
                    map2.put(line2,map.get(line2)+1);
                }
            }
            //print map collction
            showMap(map2);
        }
        catch (Exception ex)
        {
            ex.printStackTrace();
        }
    }
    private static void showMap(Map<String,Integer> map)
    {
        if(map == null)
            return;
        Set<String> keyset = map.keySet();
        Iterator<String> it = keyset.iterator();
        int count = 0;
        while(it.hasNext())
        {
            String s = it.next();
            System.out.println( s+ "......" + map.get(s));
            count++;
    
        }
        System.out.println("重复两次的数据:" + count);
    }
    }
  • 相关阅读:
    整理一批 国内外优秀设计团队 & 设计相关网站
    国内技术团队博客盘点(不只是前端!)
    【技能大赛笔记01】Zigbee点对点按键控制程序开发
    【网络爬虫入门05】分布式文件存储数据库MongoDB的基本操作与爬虫应用
    【网络爬虫入门04】彻底掌握BeautifulSoup的CSS选择器
    【网络爬虫入门03】爬虫解析利器beautifulSoup模块的基本应用
    【网络爬虫入门02】HTTP客户端库Requests的基本原理与基础应用
    【网络爬虫入门01】应用Requests和BeautifulSoup联手打造的第一条网络爬虫
    【Zigbee技术入门教程-02】一图读懂ZStack协议栈的核心思想与工作机理
    【Zigbee技术入门教程-号外】基于Z-Stack协议栈的抢答系统
  • 原文地址:https://www.cnblogs.com/taiguyiba/p/8660207.html
Copyright © 2011-2022 走看看