zoukankan      html  css  js  c++  java
  • [Java] 数据分析 -- 大数据

    单词计数

    • 需求:输入小说文本,输出每个单词出现的次数
    • 实现:分map、combine、reduce三个阶段实现
      1 /*  Data Analysis with Java
      2  *  John R. Hubbard
      3  *  Aug 4, 2017
      4  */
      5 
      6 package com.hongfeng.Chapter11;
      7 
      8 import java.io.File;
      9 import java.io.IOException;
     10 import java.io.PrintWriter;
     11 import java.util.ArrayList;
     12 import java.util.Collections;
     13 import java.util.HashMap;
     14 import java.util.List;
     15 import java.util.Map;
     16 import java.util.Scanner;
     17 
     18 public class Example1 {
     19     public static void main(String[] args) {
     20         try {
     21             File tempFile = new File("data/Temp.dat");
     22             map("data/sonnets/", 80, tempFile);
     23 
     24             Map<String,StringBuilder> hashTable = new HashMap(2500);
     25             combine(tempFile, hashTable);
     26 
     27             File outFile = new File("data/Output.dat");
     28             reduce(hashTable, outFile);
     29         } catch (IOException e) {
     30             System.err.println(e);
     31         }
     32     }
     33 
     34     public static void map(String src, int n, File temp) throws IOException {
     35         PrintWriter writer = new PrintWriter(temp);
     36         for (int i = 0; i < n; i++) {
     37             String filename = String.format("%sSonnet%03d.txt", src, i+1);
     38             map(filename, writer);
     39         }
     40         writer.close();
     41     }
     42 
     43     public static void combine(File temp, Map<String,StringBuilder> table) 
     44             throws IOException {
     45         Scanner scanner = new Scanner(temp);
     46         while (scanner.hasNext()) {
     47             String word = scanner.next();
     48             StringBuilder value = table.get(word);
     49             if (value == null) {
     50                 value = new StringBuilder("");
     51             }
     52             table.put(word, value.append(" 1"));
     53             scanner.nextLine();  // scan past the rest of the line (a "1")
     54         }
     55         scanner.close();
     56     }
     57         
     58     public static void reduce(Map<String,StringBuilder> table, File out) 
     59             throws IOException {
     60         PrintWriter writer = new PrintWriter(out);
     61         for (Map.Entry<String, StringBuilder> entry : table.entrySet()) {
     62             String key = entry.getKey();  // e.g., "speak"
     63             String value = entry.getValue().toString();  // e.g., "1 1 1 1 1"
     64             reduce(key, value, writer);
     65         }
     66         writer.close();
     67     }
     68 
     69     
     70     /*  Writes the pair (word, 1) for each word in the specified file.
     71     */
     72     public static void map(String filename, PrintWriter writer) 
     73             throws IOException {
     74         Scanner input = new Scanner(new File(filename));
     75         input.useDelimiter("[.,:;()?!"\s]+");
     76         while (input.hasNext()) {
     77             String word = input.next();
     78             writer.printf("%s 1%n", word.toLowerCase());
     79         }
     80         input.close();
     81     }
     82 
     83     /*  Counts the 1s in the value argument and writes (key, count) to file.
     84     */
     85     public static void reduce(String key, String value, PrintWriter writer)
     86             throws IOException {
     87         int count = (value.length() + 1)/2;  // e.g. "1 1 1 1 1" => 5
     88         writer.printf("%s %d%n", key, count);
     89     }
     90     
     91     private static void sort(File file) throws IOException {
     92         Scanner input = new Scanner(file);
     93         List<String> list = new ArrayList();
     94         while (input.hasNext()) {
     95             list.add(input.nextLine());
     96         }
     97         input.close();
     98         Collections.sort(list);
     99         PrintWriter output = new PrintWriter(file);
    100         for (String string : list) {
    101             output.println(string);
    102         }
    103         output.close();
    104     }
    105 }
    View Code

    参考

    java稀疏矩阵乘法

    https://www.cnblogs.com/a1439775520/p/13074387.html

    mapreduce实现稀疏矩阵乘法

    https://my.oschina.net/ssrs2202/blog/494516?p=1

    https://blog.csdn.net/liuxinghao/article/details/39958957

    hashmap自定义键

    https://blog.csdn.net/weixin_30502965/article/details/95265093

    https://blog.csdn.net/weixin_33881426/article/details/112074005

    https://blog.csdn.net/u011311291/article/details/87873756

    https://blog.csdn.net/Revivedsun/article/details/96225010

  • 相关阅读:
    在线免费学习全世界的课程
    【安卓】imageView.scaleType取centerCrop后,再用padding时显示异常?
    win8+VS2012搭建OpenGL超级宝典的环境
    leetcode第一刷_Plus One
    HASH JION AND NESTED JION
    Hive创建外部表以及分区
    Android数字签名解析(一)
    Alex 的 Hadoop 菜鸟教程: 第3课 Hadoop 安装教程
    用C#生成不反复的随机数
    Swift数据类型及数据类型转换
  • 原文地址:https://www.cnblogs.com/cxc1357/p/14733656.html
Copyright © 2011-2022 走看看