zoukankan      html  css  js  c++  java
  • Spark API编程动手实战-02-以集群模式进行Spark API实战textFile、cache、count

    操作HDFS:先要保证HDFS启动了:


    启动spark集群:


    以spark-shell运行在spark集群上:



    查看下之前上传到HDFS上的”LICENSE.txt“文件:


    用spark读取这个文件:


    使用count统计该文件的行数:


     我们可以看到count 耗时为0.239708s

    对该RDD进行cache操作并执行count使得缓存生效:


    执行count结果为:


    此时耗时为0.21132s

    再执行count操作:


    此时耗时为0.029580s,这时因为我们自己基于cache后的数据进行操作的。

    接着我们对上面的rdd进行wordcount操作:



    通过saveAsTextFile把数据存到HDFS中:


    我们通过web控制台查看下运行结果:


    我们通过命令行看下part-00000和part-00001内容:

    [spark@S1PA222 ~]$ hadoop fs -cat /data/resultLicenseWordCount/part-00000
    15/01/22 13:51:32 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    (under,10)
    (Unless,3)
    (Contributions),1)
    (offer,1)
    (agree,1)
    (BUSINESS,2)
    (NON-INFRINGEMENT,,1)
    (its,4)
    (materials,2)
    (event,1)
    (intentionally,2)
    (Grant,2)
    (writing,1)
    (include,3)
    (responsibility,,1)
    (have,2)
    (MERCHANTABILITY,,1)
    (Contribution,3)
    (Massachusetts,1)
    (express,2)
    ("Your"),1)
    ((i),1)
    (However,,1)
    (been,2)
    (files;,1)
    (This,1)
    (stating,1)
    (2-Clause,1)
    (conditions.,1)
    (non-exclusive,,2)
    (appropriateness,1)
    (marked,1)
    (risks,1)
    (any,28)
    (IS",4)
    (implementation,1)
    (filed.,1)
    (Sections,1)
    (fee,1)
    (losses),,1)
    (out,1)
    (contract,2)
    (DISTRIBUTION,1)
    (4.,1)
    (file,6)
    (documentation,,2)
    (wherever,1)
    (unless,1)
    (below).,1)
    (names,,1)
    (verbal,,1)
    (ANY,10)
    (version,1)
    (file.,2)
    (are,10)
    (no-charge,,2)
    (2.,1)
    (from,,1)
    (reproduction,,3)
    (2011-2014,,1)
    (assume,1)
    (licenses,1)
    (DATA,,2)
    (IS,2)
    (recommend,1)
    (prominent,1)
    (revisions,,1)
    ("[]",1)
    (FITNESS,3)
    (otherwise,,3)
    (distribution,,1)
    (necessarily,1)
    (Apache,5)
    (grant,1)
    (CONTRIBUTORS,4)
    (as,15)
    (irrevocable,2)
    (inclusion,2)
    (purpose,2)
    (products,1)
    (ARE,2)
    (merely,1)
    (File,1)
    (Definitions.,1)
    (form,10)
    (IMPLIED,4)
    (Warranty,1)
    (Patent,1)
    (incurred,1)
    (8.,1)
    (repository,1)
    (contributors,1)
    ("printed,1)
    (sell,,2)
    (:,3)
    (malfunction,,1)
    (Version,2)
    (origin,1)
    (alongside,1)
    (CRC,1)
    (implied.,1)
    (contract,,1)
    (representatives,,1)
    (warranty,1)
    (offer,,1)
    (org.apache.hadoop.util.bloom.*,1)
    (KIND,,2)
    (is,10)
    (conspicuously,1)
    (found,1)
    (charge,1)
    (make,,1)
    (file,,1)
    (associated,1)
    (even,1)
    (same,1)
    ((Don't,1)
    (outstanding,1)
    (link,1)
    ([name,1)
    (Trademarks.,1)
    (notice,2)
    (endorse,1)
    (shall,15)
    (contact,1)
    (Redistributions,4)
    (using,1)
    (class,1)
    (name),1)
    (behalf,5)
    (form.,1)
    (We,1)
    (INTERRUPTION),2)
    (responsible,1)
    (annotations,,1)
    (THIS,4)
    (subject,1)
    (acting,1)
    (permitted,2)
    (OUT,2)
    (BASIS,,2)
    (has,2)
    (Accepting,1)
    (defend,,1)
    (University,1)
    ([yyyy],1)
    ((http://www.one-lab.org),1)
    (EVENT,2)
    (granting,1)
    (portions,1)
    (implied,,1)
    (NOTICE,5)
    (infringed,1)
    (limitation,,1)
    (names,2)
    (electronic,,1)
    (PURPOSE,2)
    (licensable,1)
    (section),1)
    (conditions,14)
    (EVEN,2)
    (acts),1)
    (law,3)
    (licenses.,1)
    (compression,1)
    (readable,1)
    (solely,1)
    (configuration,1)
    (information.,1)
    (litigation,2)
    (represent,,1)
    (warranty,,1)
    (shares,,1)
    (supersede,1)
    (governed,1)
    (marks,,1)
    (http://code.google.com/p/lz4/,1)
    (modification,,2)
    (fifty,1)
    (sent,1)
    (places:,1)
    (means,2)
    (identifying,1)
    (this,22)
    (Works",1)
    (Louvain,1)
    (prior,1)
    (slicing-by-8,1)
    (PROCUREMENT,2)
    (changed,1)
    (describing,1)
    (only,4)
    (contributory,1)
    (normally,1)
    (indirect,,2)
    (WITHOUT,2)
    (Works,12)
    (documentation,3)
    (agreement,1)
    (otherwise,3)
    ("AS,4)
    (damages,,1)
    (patent,,1)
    (APACHE,1)
    (without,6)
    ("NOTICE",1)
    (Limitation,1)
    (SUBSTITUTE,2)
    (Contribution(s),3)
    (Subject,2)
    (Submission,1)
    (UCL,1)
    (TITLE,,1)
    (trademarks,,1)
    ((iii),1)
    (2.0,1)
    (Fast,1)
    (exercise,1)
    (accepting,2)
    (example,1)
    (distribution.,2)
    (interfaces,1)
    (conditions:,1)
    (act,1)
    (incorporated,2)
    (provides,2)
    (limited,4)
    (LZ4,3)
    (2008,2009,2010,1)
    (can,2)
    (contents,1)
    (PURPOSE.,1)
    (recipients,1)
    ("Contribution",1)
    (failure,1)
    (communication,3)
    (commercial,1)
    (works,1)
    (language,1)
    (permissions,3)
    (WARRANTIES,4)
    (media,1)
    (reserved.,2)
    (Works,,2)
    (How,1)
    (WARRANTIES,,2)
    (controlled,1)
    (Warranty.,1)
    (2.0,,1)
    ((http://www.opensource.org/licenses/bsd-license.php),1)
    (own,4)
    (submit,1)
    (SHALL,2)
    (reasonable,1)
    (reason,1)
    (agreed,3)
    (systems,1)
    (patent,5)
    (form,,4)
    (Technology.,1)
    (advised,1)
    (systems,,1)
    (classes:,1)
    (HOWEVER,2)
    (distribution,3)
    (DAMAGES,2)
    ((c),2)
    (src/main/native/src/org/apache/hadoop/util:,1)
    (PROFITS;,2)
    (perpetual,,2)
    (applies,1)
    (apply,2)
    (subcomponents,2)
    (modify,2)
    (owner],1)
    (one,1)
    (modifying,1)
    (counterclaim,1)
    (January,1)
    (discussing,1)
    (CONTRACT,,2)
    (with,16)
    ((C),1)
    (infringement,,1)
    (2004,1)
    (lawsuit),1)
    (specific,2)
    (LZ,1)
    (warranties,1)
    (reproducing,1)
    (promote,1)
    (beneficial,1)
    (ADVISED,2)
    ((a),1)
    (other,9)
    (date,1)
    (met:,2)
    (publicly,2)
    (from,4)
    (LIMITED,4)
    (display,,1)
    (MERCHANTABILITY,2)
    (damages,3)
    (SUBCOMPONENTS:,1)
    (negligence),,1)
    (remain,1)
    (CONDITIONS,4)
    (their,2)
    (electronic,1)
    (identification,1)
    (determining,1)
    (consistent,1)
    (display,1)
    (writing,,3)
    (trade,1)
    (third-party,2)
    (,1299)
    (description,1)
    (REPRODUCTION,,1)
    (attached,1)
    (list,4)
    (*,34)
    (INDIRECT,,2)
    (designated,1)
    (Contribution.",1)
    (complies,1)
    (addendum,1)
    (damages.,1)
    (Yann,1)
    (EXPRESS,2)
    (License;,1)
    (6.,1)
    (GOODS,2)
    (subsequently,1)
    (included,2)
    (replaced,1)
    (notice,,5)
    [spark@S1PA222 ~]$   hadoop fs -cat /data/resultLicenseWordCount/part-00001

    15/01/22 13:52:29 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    (For,6)
    (reproduce,,1)
    ("Contributor",1)
    ((or,3)
    (nothing,1)
    (work.,1)
    (content,1)
    (HOLDERS,2)
    (add,2)
    (through,1)
    (All,2)
    (perform,,1)
    (result,1)
    (goodwill,,1)
    (herein,1)
    (direct,,1)
    (used,1)
    (To,1)
    (harmless,1)
    (9.,1)
    (these,1)
    (control,,1)
    (INCIDENTAL,,2)
    (indicated,1)
    (part,4)
    (alone,1)
    (different,1)
    (forms,,2)
    (purposes,4)
    (https://groups.google.com/forum/#!forum/lz4c,1)
    (be,7)
    (/**,2)
    (carry,1)
    (separable,1)
    (including,5)
    (contained,1)
    (combination,1)
    (calculation,1)
    (license,7)
    (FOR,6)
    (thereof,,2)
    (ARISING,2)
    (constitutes,1)
    (but,5)
    (types.,1)
    (stated,2)
    (archives.,1)
    (obligations,,1)
    (5.,1)
    (Works;,3)
    (nor,1)
    ("Legal,1)
    (Work,20)
    (whole,,2)
    (Copyright,5)
    (at,3)
    (copyright,,1)
    (Redistribution,2)
    (object,1)
    (copy,3)
    (indemnify,,1)
    (asserted,1)
    (HADOOP,1)
    (attach,1)
    ("control",1)
    (support,,1)
    ("Object",1)
    (give,1)
    (THEORY,2)
    (may,10)
    (except,2)
    ("Work",1)
    (sublicense,,1)
    (IF,2)
    (granted,2)
    (project,2)
    (authorized,2)
    (SPECIAL,,2)
    (BY,2)
    (retain,2)
    (or,65)
    (transfer,1)
    (fields,1)
    (Licensor,,1)
    ((b),1)
    ((ii),1)
    (2005,,1)
    (of,75)
    (does,1)
    (transformation,1)
    ((INCLUDING,2)
    (DIRECT,,2)
    (management,1)
    (modified,1)
    (Licensed,1)
    (percent,1)
    (Header,1)
    (original,2)
    (Contributor,,1)
    (native,1)
    ((INCLUDING,,2)
    (PARTICULAR,3)
    (limitations,1)
    (THE,10)
    (INCLUDING,,2)
    (power,,1)
    (CAUSED,2)
    (de,1)
    (appropriate,1)
    (against,,1)
    (TORT,2)
    ("Source",1)
    (each,4)
    (1.,1)
    (following,10)
    (Liability.,2)
    (acceptance,1)
    ("You",1)
    (sole,1)
    (from),1)
    (See,1)
    (tracking,1)
    (for,19)
    (cause,2)
    (alleging,1)
    (obtain,1)
    (reproduce,3)
    (source,,1)
    (control,2)
    (EXEMPLARY,,2)
    (TERMS,2)
    (terms,8)
    (syntax,1)
    (SERVICES;,2)
    (made,,1)
    (BUT,4)
    (compiled,1)
    (issue,1)
    ("submitted",1)
    (OneLab,1)
    (algorithm,1)
    (was,1)
    (While,1)
    (entity,,1)
    (do,3)
    (PROVIDED,2)
    (no,2)
    (License,10)
    (entity,3)
    (Contributions.,2)
    (mean,10)
    (individual,3)
    (Institute,1)
    (computer,1)
    (notices,9)
    (Neither,1)
    (Licensor,8)
    (STRICT,2)
    (made,1)
    (authorship,,2)
    (bind,1)
    ((the,1)
    (indemnity,,1)
    (distribute,3)
    (You,24)
    (grants,2)
    (brackets,1)
    (meet,1)
    (for,,1)
    (service,1)
    (in,31)
    (trademark,,1)
    (boilerplate,1)
    (WAY,2)
    (LOSS,2)
    (distributed,3)
    (LIABILITY,,4)
    (submitted,2)
    (public,1)
    (OF,19)
    (managed,1)
    (derived,2)
    (Source,8)
    (use,,4)
    (name,2)
    (definition,,2)
    (that,25)
    (src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,lz4hc.c},,1)
    (customary,1)
    (BSD,1)
    (thereof,1)
    (claims,2)
    (CONSEQUENTIAL,2)
    (translation,1)
    (format.,1)
    (construed,1)
    (DAMAGE.,2)
    (applicable,3)
    (binary,4)
    (regarding,1)
    (European,1)
    (excluding,3)
    (END,1)
    ((d),1)
    (choose,1)
    (NO,2)
    (BE,2)
    (direct,2)
    (retain,,1)
    (modifications,,3)
    (forum,1)
    (owner,4)
    (USE,2)
    (informational,1)
    (The,3)
    (legal,1)
    ((50%),1)
    (document.,1)
    (received,1)
    (such,17)
    (institute,1)
    (distribute,,2)
    (WHETHER,2)
    (page",1)
    ((except,1)
    (loss,1)
    (common,1)
    (additions,1)
    (BSD-style,1)
    (Appendix,1)
    (Use,1)
    (disclaimer,2)
    (resulting,1)
    (ON,2)
    (hereby,2)
    (License.,11)
    (software,3)
    (whom,1)
    (along,1)
    (lists,,1)
    (required,4)
    (OR,18)
    (ownership,2)
    (SOFTWARE,2)
    (the,122)
    (includes,1)
    (obligations,1)
    (import,,1)
    (not,11)
    (either,2)
    (terminate,1)
    (if,4)
    (stoppage,,1)
    (provided,9)
    (submitted.,1)
    (all,3)
    (permission.,1)
    ("License");,1)
    (written,2)
    (generated,2)
    (consequential,1)
    (Derivative,17)
    (AND,11)
    (rights,3)
    (http://www.apache.org/licenses/,1)
    (terms.,1)
    (Catholique,1)
    (deliberate,1)
    (entity.,2)
    (Work,,4)
    (special,,1)
    (Additional,1)
    (Legal,3)
    (034819,1)
    (least,1)
    (text,4)
    (on,11)
    (editorial,1)
    (redistributing,2)
    ("License",1)
    (against,1)
    (permission,1)
    (9,1)
    (separate,2)
    (and/or,3)
    (LICENSE,1)
    (union,1)
    ((and,1)
    (1,1)
    (including,,1)
    (Entity,3)
    (negligent,1)
    (LIABLE,2)
    (IN,6)
    (use,8)
    (enclosed,2)
    (contains,1)
    (files,1)
    (Entity",1)
    (Work.,1)
    (owner.,1)
    (preferred,1)
    (modifications,3)
    (brackets!),1)
    (available,1)
    (code,5)
    (http://www.apache.org/licenses/LICENSE-2.0,1)
    (more,1)
    (possibility,1)
    (product,1)
    (liable,1)
    (SUCH,2)
    (direction,1)
    (must,8)
    (making,1)
    (Disclaimer,1)
    (disclaimer.,2)
    (Commission,1)
    (OTHERWISE),2)
    (Hadoop,1)
    ((an,1)
    (APPENDIX:,1)
    ("Licensor",1)
    (DISCLAIMED.,2)
    ("Derivative,1)
    (elaborations,,1)
    (incidental,,1)
    (prepare,1)
    (A,3)
    (exercising,1)
    (*/,3)
    (which,2)
    (pertain,2)
    (explicitly,1)
    (tort,1)
    (3.,1)
    (also,1)
    (conversions,1)
    (liability,2)
    (whether,4)
    (character,1)
    (should,1)
    (thereof.,1)
    (of,,3)
    (your,4)
    (royalty-free,,2)
    (entities,1)
    (or,,1)
    (NEGLIGENCE,2)
    (author,1)
    ("Not,1)
    (source,9)
    (then,2)
    ((including,3)
    (Redistribution.,1)
    (attribution,4)
    (by,21)
    (TO,,4)
    (defined,1)
    (OWNER,2)
    (If,2)
    (an,6)
    (/*,1)
    (Collet.,1)
    (improving,1)
    (grossly,1)
    (COPYRIGHT,4)
    (above,,1)
    (theory,,1)
    (mailing,1)
    (7.,1)
    (Notwithstanding,1)
    (code,,2)
    (cross-claim,1)
    (provide,1)
    ((such,1)
    (arising,1)
    (Object,4)
    (In,1)
    (-,7)
    (those,3)
    (work,,2)
    (easier,1)
    (based,1)
    (medium,,1)
    (within,8)
    (worldwide,,2)
    (authorship.,1)
    (files.,1)
    (inability,1)
    (you,2)
    (POSSIBILITY,2)
    (cannot,1)
    (copies,1)
    (a,21)
    (statement,1)
    (above,4)
    (state,1)
    (work,5)
    (by,,3)
    (to,41)
    (appear.,1)
    (Your,9)
    (where,1)
    (liability.,1)
    (governing,1)
    (NOT,4)
    (License,,6)
    (hold,1)
    (and,51)
    (copyright,15)
    (USE,,3)
    (compliance,1)
    (SOFTWARE,,2)
    (comment,1)
    (additional,4)
    (executed,1)
    (mechanical,1)
    (Contributor,8)
    [spark@S1PA222 ~]$

    版权声明:本文为博主原创文章,未经博主允许不得转载。

  • 相关阅读:
    [LeetCode] 101. Symmetric Tree 对称树
    [LeetCode] 202. Happy Number 快乐数
    Windows任务计划创建计划,定时执行PowerShell命令
    读经典——《CLR via C#》(Jeffrey Richter著) 笔记_IL和验证
    读经典——《CLR via C#》(Jeffrey Richter著) 笔记_方法执行
    吐槽自己
    读经典——《CLR via C#》(Jeffrey Richter著) 笔记_元数据
    读经典——《CLR via C#》(Jeffrey Richter著) 笔记_CLR
    设置IIS,使客户端访问服务器上的文件
    SQL语句 ANSI_NULLS 值(ON|OFF)的含义
  • 原文地址:https://www.cnblogs.com/stark-summer/p/4829805.html
Copyright © 2011-2022 走看看