zoukankan      html  css  js  c++  java
  • 统计文本中重复的内容

    1.统计一个文本中重复的内容

    package count;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileReader;
    import java.io.InputStreamReader;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.Map;
    import java.util.Set;
    
    public class countWord {
    
        public static void main(String[] args) {
          count("F:\A\B.xml");
        }
    
    public static void count(String filepath)
    {
        try
        {
            File file = new File(filepath);
            if(!file.exists())
            {
                System.out.println("file not exist");
                return;
            }
    
            //create BufferedReader to improve efficient
            InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");  
            BufferedReader bufReader = new BufferedReader(isr); 
            String line = null;
    
            //create map collection to record information
            Map<String,Integer> map = new HashMap<String,Integer>();
            while((line = bufReader.readLine()) != null)
            {
                if(map.containsKey(line))
                    map.put(line,map.get(line)+1);
                else
                    map.put(line,1);
            }
            //print map collction
            showMap(map);
        }
        catch (Exception ex)
        {
            ex.printStackTrace();
        }
    }
    private static void showMap(Map<String,Integer> map)
    {
        if(map == null)
            return;
        Set<String> keyset = map.keySet();
        Iterator<String> it = keyset.iterator();
        int count = 0;
        while(it.hasNext())
        {
            String s = it.next();
            if(map.get(s) > 1) {//个数大于1
                System.out.println( s+ "......" + map.get(s));
                count++;
            }
        }
        System.out.println("重复两次的数据:" + count);
    }
    }

    2.统计两个文本中重复的内容

    package count;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileReader;
    import java.io.InputStreamReader;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.Map;
    import java.util.Set;
    
    public class countWordTowFile {
    
        public static void main(String[] args) {
          count("F:\A\B.xml","C:\D\E.txt");
        }
    
    
    public static void count(String filepath,String filepath2)
    {
        try
        {
            File file = new File(filepath);
            File file2 = new File(filepath2);
            if(!file.exists() || !file2.exists())
            {
                System.out.println("file not exist");
                return;
            }
    
            //create BufferedReader to improve efficient
            InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");  
            InputStreamReader isr2 = new InputStreamReader(new FileInputStream(file2), "UTF-8");  
            BufferedReader bufReader = new BufferedReader(isr); 
            BufferedReader bufReader2 = new BufferedReader(isr2); 
            String line = null;
            String line2 = null;
    
            //create map collection to record information
            Map<String,Integer> map = new HashMap<String,Integer>();
            Map<String,Integer> map2 = new HashMap<String,Integer>();
            while((line = bufReader.readLine()) != null)//读取第一个文件中的数据
            {
                map.put(line,1);
            }
            while((line2 = bufReader2.readLine()) != null) {//读取第二个文件中的内容
                if(map.containsKey(line2)) {
                    map2.put(line2,map.get(line2)+1);
                }
            }
            //print map collction
            showMap(map2);
        }
        catch (Exception ex)
        {
            ex.printStackTrace();
        }
    }
    private static void showMap(Map<String,Integer> map)
    {
        if(map == null)
            return;
        Set<String> keyset = map.keySet();
        Iterator<String> it = keyset.iterator();
        int count = 0;
        while(it.hasNext())
        {
            String s = it.next();
            System.out.println( s+ "......" + map.get(s));
            count++;
    
        }
        System.out.println("重复两次的数据:" + count);
    }
    }
  • 相关阅读:
    SSH框架中使用注解和xml配置的区别
    web项目中log4j的配置
    嵌入式—ASCII码
    MATLAB
    MATLAB
    MATLAB
    MATLAB
    CentOS 7将网卡名称eno16777736改为eth0
    图像增强处理
    Debussy与modelsim联仿时 do 文件脚本
  • 原文地址:https://www.cnblogs.com/taiguyiba/p/8660207.html
Copyright © 2011-2022 走看看