zoukankan      html  css  js  c++  java
  • 给定a、b两个文件,各存放50亿个url,每个url各占64字节,内存限制是4G,让你找出a、b文件共同的url?

    package com.hadoop.hdfs;
    
    import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
    import org.junit.Test;
    
    import java.io.*;
    import java.util.HashMap;
    import java.util.HashSet;
    
    public class Suanfa1 {
        @Test
        public void a1() throws IOException {
            BufferedReader bufferedReader = new BufferedReader(new FileReader("D:/aa.txt"));
    //        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("D://"))
            String str1 = "";
            while ((str1 = bufferedReader.readLine())!=null){
                int i = (int) (hashCode(str1)%1000);
                BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("D://aa"+String.valueOf(i)+".txt"));
                bufferedWriter.write(str1);
                bufferedWriter.close();
                System.out.println(i);
            }
            bufferedReader.close();
        }
    
        public void a2() throws IOException {
            BufferedReader bufferedReader = new BufferedReader(new FileReader("D:/bb.txt"));
    //        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("D://"))
            String str1 = "";
            while ((str1 = bufferedReader.readLine())!=null){
                int i = (int) (hashCode(str1)%1000);
                BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("D://bb"+String.valueOf(i)+".txt"));
                bufferedWriter.write(str1);
                bufferedWriter.close();
            }
            bufferedReader.close();
        }
    
        public long hashCode(String str) {
            long h = 0;
            if (h == 0) {
                int off = 0;
                char val[] = str.toCharArray();
                long len = str.length();
                for (long i = 0; i < len; i++) {
                    h = 31 * h + val[off++];
                }
            }
            return h;
        }
    
        @Test
        public void a3() throws IOException {
            a1();
            a2();
            for (int i = 0; i < 1000; i++) {
                BufferedReader bufferedReader1 = new BufferedReader(new FileReader("D://aa"+String.valueOf(i)+".txt"));
                BufferedReader bufferedReader2 = new BufferedReader(new FileReader("D://bb"+String.valueOf(i)+".txt"));
                HashSet set = new HashSet();
                String input1 = "";
                while ((input1 = bufferedReader1.readLine())!=null){
                    set.add(hashCode(bufferedReader1.readLine()));
                }
    
                String input2 = "";
                while ((input2 = bufferedReader2.readLine())!=null){
                    if (set.contains(hashCode(input2))){
                        System.out.println(input2);
                    }
                }
            }
    
        }
    }
    我凝视这恒星,等待这那场风暴,我已经准备好了
  • 相关阅读:
    简单批处理内部命令简介(转)
    CPU 内存 频率 DDR DDR2 DDR3
    python 正则表达式
    bat 脚本 > >> 管道
    python 多进程 无数进程 重复进程 死机
    NLP相关期刊和会议
    deamon tools dtsoft virtual cdrom device 失败 错误
    占位
    2011年07月03日的日记
    每周总结(第二周)
  • 原文地址:https://www.cnblogs.com/cheng5350/p/11740754.html
Copyright © 2011-2022 走看看