zoukankan      html  css  js  c++  java
  • Hadoop学习之路(十)HDFS API的使用

    HDFS API的高级编程

    HDFS的API就两个:FileSystem 和Configuration

    1、文件的上传和下载

     1 package com.ghgj.hdfs.api;
     2 
     3 import org.apache.hadoop.conf.Configuration;
     4 import org.apache.hadoop.fs.FileSystem;
     5 import org.apache.hadoop.fs.Path;
     6 
     7 public class HDFS_GET_AND_PUT {
     8 
     9     public static void main(String[] args) throws Exception {
    10         
    11         
    12         Configuration conf = new Configuration();
    13         conf.set("fs.defaultFS", "hdfs://hadoop1:9000");
    14         conf.set("dfs.replication", "2");
    15         FileSystem fs = FileSystem.get(conf);
    16         
    17         
    18         /**
    19          * 更改操作用户有两种方式:
    20          * 
    21          * 1、直接设置运行换种的用户名为hadoop
    22          * 
    23          *     VM arguments ;   -DHADOOP_USER_NAME=hadoop
    24          * 
    25          * 2、在代码中进行声明
    26          * 
    27          *  System.setProperty("HADOOP_USER_NAME", "hadoop");
    28          */
    29         System.setProperty("HADOOP_USER_NAME", "hadoop");
    30         
    31         // 上传
    32         fs.copyFromLocalFile(new Path("c:/sss.txt"), new Path("/a/ggg.txt"));
    33         
    34         
    35         
    36         /**
    37          * .crc  : 校验文件
    38          * 
    39          * 每个块的元数据信息都只会记录合法数据的起始偏移量:  qqq.txt  blk_41838 :  0 - 1100byte
    40          * 
    41          * 如果进行非法的数据追加。最终是能够下载合法数据。
    42          * 由于你在数据的中间, 也就是说在 0 -1100 之间的范围进行了数据信息的更改。 造成了采用CRC算法计算出来校验值,和最初存入进HDFS的校验值
    43          * 不一致。HDFS就认为当前这个文件被损坏了。
    44          */
    45         
    46         
    47         // 下载 
    48         fs.copyToLocalFile(new Path("/a/qqq.txt"), new Path("c:/qqq3.txt"));
    49         
    50         
    51         /**
    52          * 上传和下载的API的底层封装其实就是 : FileUtil.copy(....)
    53          */
    54         
    55         fs.close();
    56     }
    57 }
    View Code

    2、配置文件conf

     1 package com.exam.hdfs;
     2 
     3 import java.io.IOException;
     4 import java.util.Iterator;
     5 import java.util.Map.Entry;
     6 
     7 import org.apache.hadoop.conf.Configuration;
     8 import org.apache.hadoop.fs.FileSystem;
     9 
    10 public class TestConf1 {
    11 
    12     public static void main(String[] args) throws Exception {
    13         
    14         
    15         /**
    16          * 底层会加载一堆的配置文件:
    17          * 
    18          * core-default.xml
    19          * hdfs-default.xml
    20          * mapred-default.xml
    21          * yarn-default.xml
    22          */
    23         Configuration conf = new Configuration();
    24 //        conf.addResource("hdfs-default.xml");
    25         
    26         /**
    27          * 当前这个hdfs-site.xml文件就放置在这个项目中的src下。也就是classpath路径下。
    28          * 所以 FS在初始化的时候,会把hdfs-site.xml这个文件中的name-value对解析到conf中
    29          * 
    30          * 
    31          * 但是:
    32          * 
    33          * 1、如果hdfs-site.xml 不在src下, 看是否能加载???  不能
    34          * 
    35          * 2、如果文件名不叫做 hdfs-default.xml 或者 hdsf-site.xml  看是否能自动加载???  不能
    36          * 
    37          * 得出的结论:
    38          * 
    39          * 如果需要项目代码自动加载配置文件中的信息,那么就必须把配置文件改成-default.xml或者-site.xml的名称
    40          * 而且必须放置在src下
    41          * 
    42          * 那如果不叫这个名,或者不在src下,也需要加载这些配置文件中的参数:
    43          * 
    44          * 必须使用conf对象提供的一些方法去手动加载
    45          */
    46 //        conf.addResource("hdfs-site.xml");
    47         conf.set("dfs.replication", "1");
    48         conf.addResource("myconfig/hdfs-site.xml");
    49         
    50         
    51         /**
    52          * 依次加载的参数信息的顺序是:
    53          * 
    54          * 1、加载 core/hdfs/mapred/yarn-default.xml
    55          * 
    56          * 2、加载通过conf.addResources()加载的配置文件
    57          * 
    58          * 3、加载conf.set(name, value)
    59          */
    60         
    61         FileSystem fs = FileSystem.get(conf);
    62         
    63         System.out.println(conf.get("dfs.replication"));
    64 
    65         
    66         Iterator<Entry<String, String>> iterator = conf.iterator();
    67         while(iterator.hasNext()){
    68             Entry<String, String> e = iterator.next();
    69             System.out.println(e.getKey() + "	" + e.getValue());
    70         }
    71     }
    72 }
    View Code

    输出结果

      1 log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
      2 log4j:WARN Please initialize the log4j system properly.
      3 log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
      4 1
      5 hadoop.security.groups.cache.secs    300
      6 dfs.datanode.cache.revocation.timeout.ms    900000
      7 dfs.namenode.resource.check.interval    5000
      8 s3.client-write-packet-size    65536
      9 dfs.client.https.need-auth    false
     10 dfs.replication    1
     11 hadoop.security.group.mapping.ldap.directory.search.timeout    10000
     12 dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold    10737418240
     13 hadoop.work.around.non.threadsafe.getpwuid    false
     14 dfs.namenode.write-lock-reporting-threshold-ms    5000
     15 fs.ftp.host.port    21
     16 dfs.namenode.avoid.read.stale.datanode    false
     17 dfs.journalnode.rpc-address    0.0.0.0:8485
     18 hadoop.security.kms.client.encrypted.key.cache.expiry    43200000
     19 ipc.client.connection.maxidletime    10000
     20 hadoop.registry.zk.session.timeout.ms    60000
     21 tfile.io.chunk.size    1048576
     22 fs.automatic.close    true
     23 ha.health-monitor.sleep-after-disconnect.ms    1000
     24 io.map.index.interval    128
     25 dfs.namenode.https-address    0.0.0.0:50470
     26 dfs.mover.max-no-move-interval    60000
     27 io.seqfile.sorter.recordlimit    1000000
     28 fs.s3n.multipart.uploads.enabled    false
     29 hadoop.util.hash.type    murmur
     30 dfs.namenode.replication.min    1
     31 dfs.datanode.directoryscan.threads    1
     32 dfs.namenode.fs-limits.min-block-size    1048576
     33 dfs.datanode.directoryscan.interval    21600
     34 fs.AbstractFileSystem.file.impl    org.apache.hadoop.fs.local.LocalFs
     35 dfs.namenode.acls.enabled    false
     36 dfs.client.short.circuit.replica.stale.threshold.ms    1800000
     37 net.topology.script.number.args    100
     38 hadoop.http.authentication.token.validity    36000
     39 fs.s3.block.size    67108864
     40 dfs.namenode.resource.du.reserved    104857600
     41 ha.failover-controller.graceful-fence.rpc-timeout.ms    5000
     42 s3native.bytes-per-checksum    512
     43 dfs.namenode.datanode.registration.ip-hostname-check    true
     44 dfs.namenode.path.based.cache.block.map.allocation.percent    0.25
     45 dfs.namenode.backup.http-address    0.0.0.0:50105
     46 hadoop.security.group.mapping    org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback
     47 dfs.namenode.edits.noeditlogchannelflush    false
     48 dfs.datanode.cache.revocation.polling.ms    500
     49 dfs.namenode.audit.loggers    default
     50 hadoop.security.groups.cache.warn.after.ms    5000
     51 io.serializations    org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization
     52 dfs.namenode.lazypersist.file.scrub.interval.sec    300
     53 fs.s3a.threads.core    15
     54 hadoop.security.crypto.buffer.size    8192
     55 hadoop.http.cross-origin.allowed-methods    GET,POST,HEAD
     56 hadoop.registry.zk.retry.interval.ms    1000
     57 dfs.http.policy    HTTP_ONLY
     58 hadoop.registry.secure    false
     59 dfs.namenode.replication.interval    3
     60 dfs.namenode.safemode.min.datanodes    0
     61 dfs.client.file-block-storage-locations.num-threads    10
     62 nfs.dump.dir    /tmp/.hdfs-nfs
     63 dfs.namenode.secondary.https-address    0.0.0.0:50091
     64 hadoop.kerberos.kinit.command    kinit
     65 dfs.block.access.token.lifetime    600
     66 dfs.webhdfs.enabled    true
     67 dfs.client.use.datanode.hostname    false
     68 dfs.namenode.delegation.token.max-lifetime    604800000
     69 fs.trash.interval    0
     70 dfs.datanode.drop.cache.behind.writes    false
     71 dfs.namenode.avoid.write.stale.datanode    false
     72 dfs.namenode.num.extra.edits.retained    1000000
     73 s3.blocksize    67108864
     74 ipc.client.connect.max.retries.on.timeouts    45
     75 dfs.datanode.data.dir    /home/hadoop/data/hadoopdata/data
     76 fs.s3.buffer.dir    ${hadoop.tmp.dir}/s3
     77 fs.s3n.block.size    67108864
     78 nfs.exports.allowed.hosts    * rw
     79 ha.health-monitor.connect-retry-interval.ms    1000
     80 hadoop.security.instrumentation.requires.admin    false
     81 hadoop.registry.zk.retry.ceiling.ms    60000
     82 nfs.rtmax    1048576
     83 dfs.client.mmap.cache.size    256
     84 dfs.datanode.data.dir.perm    700
     85 io.file.buffer.size    4096
     86 dfs.namenode.backup.address    0.0.0.0:50100
     87 dfs.client.datanode-restart.timeout    30
     88 dfs.datanode.readahead.bytes    4194304
     89 dfs.namenode.xattrs.enabled    true
     90 io.mapfile.bloom.size    1048576
     91 ipc.client.connect.retry.interval    1000
     92 dfs.client-write-packet-size    65536
     93 dfs.namenode.checkpoint.txns    1000000
     94 dfs.datanode.bp-ready.timeout    20
     95 dfs.datanode.transfer.socket.send.buffer.size    131072
     96 hadoop.security.kms.client.authentication.retry-count    1
     97 dfs.client.block.write.retries    3
     98 fs.swift.impl    org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem
     99 ha.failover-controller.graceful-fence.connection.retries    1
    100 hadoop.registry.zk.connection.timeout.ms    15000
    101 dfs.namenode.safemode.threshold-pct    0.999f
    102 dfs.cachereport.intervalMsec    10000
    103 hadoop.security.java.secure.random.algorithm    SHA1PRNG
    104 ftp.blocksize    67108864
    105 dfs.namenode.list.cache.directives.num.responses    100
    106 dfs.namenode.kerberos.principal.pattern    *
    107 file.stream-buffer-size    4096
    108 dfs.datanode.dns.nameserver    default
    109 fs.s3a.max.total.tasks    1000
    110 dfs.namenode.replication.considerLoad    true
    111 nfs.allow.insecure.ports    true
    112 dfs.namenode.edits.journal-plugin.qjournal    org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager
    113 dfs.client.write.exclude.nodes.cache.expiry.interval.millis    600000
    114 dfs.client.mmap.cache.timeout.ms    3600000
    115 ipc.client.idlethreshold    4000
    116 io.skip.checksum.errors    false
    117 ftp.stream-buffer-size    4096
    118 fs.s3a.fast.upload    false
    119 dfs.client.failover.connection.retries.on.timeouts    0
    120 file.blocksize    67108864
    121 ftp.replication    3
    122 dfs.namenode.replication.work.multiplier.per.iteration    2
    123 hadoop.security.authorization    false
    124 hadoop.http.authentication.simple.anonymous.allowed    true
    125 s3native.client-write-packet-size    65536
    126 hadoop.rpc.socket.factory.class.default    org.apache.hadoop.net.StandardSocketFactory
    127 file.bytes-per-checksum    512
    128 dfs.datanode.slow.io.warning.threshold.ms    300
    129 fs.har.impl.disable.cache    true
    130 rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB    org.apache.hadoop.ipc.ProtobufRpcEngine
    131 io.seqfile.lazydecompress    true
    132 dfs.namenode.reject-unresolved-dn-topology-mapping    false
    133 hadoop.common.configuration.version    0.23.0
    134 hadoop.security.authentication    simple
    135 dfs.datanode.drop.cache.behind.reads    false
    136 dfs.image.compression.codec    org.apache.hadoop.io.compress.DefaultCodec
    137 dfs.client.read.shortcircuit.streams.cache.size    256
    138 file.replication    1
    139 dfs.namenode.top.num.users    10
    140 dfs.namenode.accesstime.precision    3600000
    141 dfs.namenode.fs-limits.max-xattrs-per-inode    32
    142 dfs.image.transfer.timeout    60000
    143 io.mapfile.bloom.error.rate    0.005
    144 nfs.wtmax    1048576
    145 hadoop.security.kms.client.encrypted.key.cache.size    500
    146 dfs.namenode.edit.log.autoroll.check.interval.ms    300000
    147 fs.s3a.multipart.purge    false
    148 dfs.namenode.support.allow.format    true
    149 hadoop.hdfs.configuration.version    1
    150 fs.s3a.connection.establish.timeout    5000
    151 hadoop.security.group.mapping.ldap.search.attr.member    member
    152 dfs.secondary.namenode.kerberos.internal.spnego.principal    ${dfs.web.authentication.kerberos.principal}
    153 dfs.stream-buffer-size    4096
    154 hadoop.ssl.client.conf    ssl-client.xml
    155 dfs.namenode.invalidate.work.pct.per.iteration    0.32f
    156 fs.s3a.multipart.purge.age    86400
    157 dfs.journalnode.https-address    0.0.0.0:8481
    158 dfs.namenode.top.enabled    true
    159 hadoop.security.kms.client.encrypted.key.cache.low-watermark    0.3f
    160 dfs.namenode.max.objects    0
    161 hadoop.user.group.static.mapping.overrides    dr.who=;
    162 fs.s3a.fast.buffer.size    1048576
    163 dfs.bytes-per-checksum    512
    164 dfs.datanode.max.transfer.threads    4096
    165 dfs.block.access.key.update.interval    600
    166 ipc.maximum.data.length    67108864
    167 tfile.fs.input.buffer.size    262144
    168 ha.failover-controller.new-active.rpc-timeout.ms    60000
    169 dfs.client.cached.conn.retry    3
    170 dfs.client.read.shortcircuit    false
    171 hadoop.ssl.hostname.verifier    DEFAULT
    172 dfs.datanode.hdfs-blocks-metadata.enabled    false
    173 dfs.datanode.directoryscan.throttle.limit.ms.per.sec    0
    174 dfs.image.transfer.chunksize    65536
    175 hadoop.http.authentication.type    simple
    176 dfs.namenode.list.encryption.zones.num.responses    100
    177 dfs.client.https.keystore.resource    ssl-client.xml
    178 s3native.blocksize    67108864
    179 net.topology.impl    org.apache.hadoop.net.NetworkTopology
    180 dfs.client.failover.sleep.base.millis    500
    181 io.seqfile.compress.blocksize    1000000
    182 dfs.namenode.path.based.cache.refresh.interval.ms    30000
    183 dfs.namenode.decommission.interval    30
    184 dfs.permissions.superusergroup    supergroup
    185 dfs.namenode.fs-limits.max-directory-items    1048576
    186 hadoop.registry.zk.retry.times    5
    187 dfs.ha.log-roll.period    120
    188 fs.AbstractFileSystem.ftp.impl    org.apache.hadoop.fs.ftp.FtpFs
    189 ftp.bytes-per-checksum    512
    190 dfs.user.home.dir.prefix    /user
    191 dfs.namenode.checkpoint.edits.dir    ${dfs.namenode.checkpoint.dir}
    192 dfs.client.socket.send.buffer.size    131072
    193 ipc.client.fallback-to-simple-auth-allowed    false
    194 dfs.blockreport.initialDelay    0
    195 dfs.namenode.inotify.max.events.per.rpc    1000
    196 dfs.namenode.heartbeat.recheck-interval    300000
    197 dfs.namenode.safemode.extension    30000
    198 dfs.client.failover.sleep.max.millis    15000
    199 dfs.namenode.delegation.key.update-interval    86400000
    200 dfs.datanode.transfer.socket.recv.buffer.size    131072
    201 hadoop.rpc.protection    authentication
    202 fs.permissions.umask-mode    022
    203 fs.s3.sleepTimeSeconds    10
    204 dfs.namenode.fs-limits.max-xattr-size    16384
    205 ha.health-monitor.rpc-timeout.ms    45000
    206 hadoop.http.staticuser.user    dr.who
    207 dfs.datanode.http.address    0.0.0.0:50075
    208 fs.s3a.connection.maximum    15
    209 fs.s3a.paging.maximum    5000
    210 fs.AbstractFileSystem.viewfs.impl    org.apache.hadoop.fs.viewfs.ViewFs
    211 dfs.namenode.blocks.per.postponedblocks.rescan    10000
    212 fs.ftp.host    0.0.0.0
    213 dfs.lock.suppress.warning.interval    10s
    214 hadoop.http.authentication.kerberos.keytab    ${user.home}/hadoop.keytab
    215 fs.s3a.impl    org.apache.hadoop.fs.s3a.S3AFileSystem
    216 hadoop.registry.zk.root    /registry
    217 hadoop.jetty.logs.serve.aliases    true
    218 dfs.namenode.fs-limits.max-blocks-per-file    1048576
    219 dfs.balancer.keytab.enabled    false
    220 dfs.client.block.write.replace-datanode-on-failure.enable    true
    221 hadoop.http.cross-origin.max-age    1800
    222 io.compression.codec.bzip2.library    system-native
    223 dfs.namenode.checkpoint.dir    file://${hadoop.tmp.dir}/dfs/namesecondary
    224 dfs.client.use.legacy.blockreader.local    false
    225 dfs.namenode.top.windows.minutes    1,5,25
    226 ipc.ping.interval    60000
    227 net.topology.node.switch.mapping.impl    org.apache.hadoop.net.ScriptBasedMapping
    228 nfs.mountd.port    4242
    229 dfs.storage.policy.enabled    true
    230 dfs.namenode.list.cache.pools.num.responses    100
    231 fs.df.interval    60000
    232 nfs.server.port    2049
    233 ha.zookeeper.parent-znode    /hadoop-ha
    234 hadoop.http.cross-origin.allowed-headers    X-Requested-With,Content-Type,Accept,Origin
    235 dfs.datanode.block-pinning.enabled    false
    236 dfs.namenode.num.checkpoints.retained    2
    237 fs.s3a.attempts.maximum    10
    238 s3native.stream-buffer-size    4096
    239 io.seqfile.local.dir    ${hadoop.tmp.dir}/io/local
    240 fs.s3n.multipart.copy.block.size    5368709120
    241 dfs.encrypt.data.transfer.cipher.key.bitlength    128
    242 dfs.client.mmap.retry.timeout.ms    300000
    243 dfs.datanode.sync.behind.writes    false
    244 dfs.namenode.fslock.fair    true
    245 hadoop.ssl.keystores.factory.class    org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory
    246 dfs.permissions.enabled    true
    247 fs.AbstractFileSystem.hdfs.impl    org.apache.hadoop.fs.Hdfs
    248 dfs.blockreport.split.threshold    1000000
    249 dfs.datanode.balance.bandwidthPerSec    1048576
    250 dfs.block.scanner.volume.bytes.per.second    1048576
    251 hadoop.security.random.device.file.path    /dev/urandom
    252 fs.s3.maxRetries    4
    253 hadoop.http.filter.initializers    org.apache.hadoop.http.lib.StaticUserWebFilter
    254 dfs.namenode.stale.datanode.interval    30000
    255 ipc.client.rpc-timeout.ms    0
    256 fs.client.resolve.remote.symlinks    true
    257 dfs.default.chunk.view.size    32768
    258 hadoop.ssl.enabled.protocols    TLSv1
    259 dfs.namenode.decommission.blocks.per.interval    500000
    260 dfs.namenode.handler.count    10
    261 dfs.image.transfer.bandwidthPerSec    0
    262 rpc.metrics.quantile.enable    false
    263 hadoop.ssl.enabled    false
    264 dfs.replication.max    512
    265 dfs.namenode.name.dir    /home/hadoop/data/hadoopdata/name
    266 dfs.namenode.read-lock-reporting-threshold-ms    5000
    267 dfs.datanode.https.address    0.0.0.0:50475
    268 dfs.datanode.failed.volumes.tolerated    0
    269 ipc.client.kill.max    10
    270 fs.s3a.threads.max    256
    271 ipc.server.listen.queue.size    128
    272 dfs.client.domain.socket.data.traffic    false
    273 dfs.block.access.token.enable    false
    274 dfs.blocksize    134217728
    275 fs.s3a.connection.timeout    50000
    276 fs.s3a.threads.keepalivetime    60
    277 file.client-write-packet-size    65536
    278 dfs.datanode.address    0.0.0.0:50010
    279 ha.failover-controller.cli-check.rpc-timeout.ms    20000
    280 ha.zookeeper.acl    world:anyone:rwcda
    281 ipc.client.connect.max.retries    10
    282 dfs.encrypt.data.transfer    false
    283 dfs.namenode.write.stale.datanode.ratio    0.5f
    284 ipc.client.ping    true
    285 dfs.datanode.shared.file.descriptor.paths    /dev/shm,/tmp
    286 dfs.short.circuit.shared.memory.watcher.interrupt.check.ms    60000
    287 hadoop.tmp.dir    /home/hadoop/data/hadoopdata
    288 dfs.datanode.handler.count    10
    289 dfs.client.failover.max.attempts    15
    290 dfs.balancer.max-no-move-interval    60000
    291 dfs.client.read.shortcircuit.streams.cache.expiry.ms    300000
    292 dfs.namenode.block-placement-policy.default.prefer-local-node    true
    293 hadoop.ssl.require.client.cert    false
    294 hadoop.security.uid.cache.secs    14400
    295 dfs.client.read.shortcircuit.skip.checksum    false
    296 dfs.namenode.resource.checked.volumes.minimum    1
    297 hadoop.registry.rm.enabled    false
    298 dfs.namenode.quota.init-threads    4
    299 dfs.namenode.max.extra.edits.segments.retained    10000
    300 dfs.webhdfs.user.provider.user.pattern    ^[A-Za-z_][A-Za-z0-9._-]*[$]?$
    301 dfs.client.mmap.enabled    true
    302 dfs.client.file-block-storage-locations.timeout.millis    1000
    303 dfs.datanode.block.id.layout.upgrade.threads    12
    304 dfs.datanode.use.datanode.hostname    false
    305 hadoop.fuse.timer.period    5
    306 dfs.client.context    default
    307 fs.trash.checkpoint.interval    0
    308 dfs.journalnode.http-address    0.0.0.0:8480
    309 dfs.balancer.address    0.0.0.0:0
    310 dfs.namenode.lock.detailed-metrics.enabled    false
    311 dfs.namenode.delegation.token.renew-interval    86400000
    312 ha.health-monitor.check-interval.ms    1000
    313 dfs.namenode.retrycache.heap.percent    0.03f
    314 ipc.client.connect.timeout    20000
    315 dfs.reformat.disabled    false
    316 dfs.blockreport.intervalMsec    21600000
    317 fs.s3a.multipart.threshold    2147483647
    318 dfs.https.server.keystore.resource    ssl-server.xml
    319 hadoop.http.cross-origin.enabled    false
    320 io.map.index.skip    0
    321 dfs.balancer.block-move.timeout    0
    322 io.native.lib.available    true
    323 s3.replication    3
    324 dfs.namenode.kerberos.internal.spnego.principal    ${dfs.web.authentication.kerberos.principal}
    325 fs.AbstractFileSystem.har.impl    org.apache.hadoop.fs.HarFs
    326 hadoop.security.kms.client.encrypted.key.cache.num.refill.threads    2
    327 fs.s3n.multipart.uploads.block.size    67108864
    328 dfs.image.compress    false
    329 dfs.datanode.dns.interface    default
    330 dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction    0.75f
    331 tfile.fs.output.buffer.size    262144
    332 fs.du.interval    600000
    333 dfs.client.failover.connection.retries    0
    334 dfs.namenode.edit.log.autoroll.multiplier.threshold    2.0
    335 hadoop.security.group.mapping.ldap.ssl    false
    336 dfs.namenode.top.window.num.buckets    10
    337 fs.s3a.buffer.dir    ${hadoop.tmp.dir}/s3a
    338 dfs.namenode.checkpoint.check.period    60
    339 fs.defaultFS    hdfs://hadoop1:9000
    340 fs.s3a.multipart.size    104857600
    341 dfs.client.slow.io.warning.threshold.ms    30000
    342 dfs.datanode.max.locked.memory    0
    343 dfs.namenode.retrycache.expirytime.millis    600000
    344 hadoop.security.group.mapping.ldap.search.attr.group.name    cn
    345 dfs.client.block.write.replace-datanode-on-failure.best-effort    false
    346 dfs.ha.fencing.ssh.connect-timeout    30000
    347 dfs.datanode.scan.period.hours    504
    348 hadoop.registry.zk.quorum    localhost:2181
    349 dfs.namenode.fs-limits.max-component-length    255
    350 hadoop.http.cross-origin.allowed-origins    *
    351 dfs.namenode.enable.retrycache    true
    352 dfs.datanode.du.reserved    0
    353 dfs.datanode.ipc.address    0.0.0.0:50020
    354 hadoop.registry.system.acls    sasl:yarn@, sasl:mapred@, sasl:hdfs@
    355 dfs.namenode.path.based.cache.retry.interval.ms    30000
    356 hadoop.security.crypto.cipher.suite    AES/CTR/NoPadding
    357 dfs.client.block.write.replace-datanode-on-failure.policy    DEFAULT
    358 dfs.namenode.http-address    0.0.0.0:50070
    359 hadoop.security.crypto.codec.classes.aes.ctr.nopadding    org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec,org.apache.hadoop.crypto.JceAesCtrCryptoCodec
    360 dfs.ha.tail-edits.period    60
    361 hadoop.security.groups.negative-cache.secs    30
    362 hadoop.ssl.server.conf    ssl-server.xml
    363 hadoop.registry.jaas.context    Client
    364 s3native.replication    3
    365 hadoop.security.group.mapping.ldap.search.filter.group    (objectClass=group)
    366 hadoop.http.authentication.kerberos.principal    HTTP/_HOST@LOCALHOST
    367 dfs.namenode.startup.delay.block.deletion.sec    0
    368 hadoop.security.group.mapping.ldap.search.filter.user    (&(objectClass=user)(sAMAccountName={0}))
    369 dfs.namenode.edits.dir    ${dfs.namenode.name.dir}
    370 dfs.namenode.checkpoint.max-retries    3
    371 s3.stream-buffer-size    4096
    372 ftp.client-write-packet-size    65536
    373 dfs.datanode.fsdatasetcache.max.threads.per.volume    4
    374 hadoop.security.sensitive-config-keys    password$,fs.s3.*[Ss]ecret.?[Kk]ey,fs.azure.account.key.*,dfs.webhdfs.oauth2.[a-z]+.token,hadoop.security.sensitive-config-keys
    375 dfs.namenode.decommission.max.concurrent.tracked.nodes    100
    376 dfs.namenode.name.dir.restore    false
    377 ipc.server.log.slow.rpc    false
    378 dfs.heartbeat.interval    3
    379 dfs.namenode.secondary.http-address    hadoop3:50090
    380 ha.zookeeper.session-timeout.ms    5000
    381 s3.bytes-per-checksum    512
    382 fs.s3a.connection.ssl.enabled    true
    383 hadoop.http.authentication.signature.secret.file    ${user.home}/hadoop-http-auth-signature-secret
    384 hadoop.fuse.connection.timeout    300
    385 dfs.namenode.checkpoint.period    3600
    386 ipc.server.max.connections    0
    387 dfs.ha.automatic-failover.enabled    false
    View Code

    3、列出指定目录下的文件以及块的信息

     1 package com.exam.hdfs;
     2 
     3 import org.apache.hadoop.conf.Configuration;
     4 import org.apache.hadoop.fs.BlockLocation;
     5 import org.apache.hadoop.fs.FileSystem;
     6 import org.apache.hadoop.fs.LocatedFileStatus;
     7 import org.apache.hadoop.fs.Path;
     8 import org.apache.hadoop.fs.RemoteIterator;
     9 
    10 public class TestHDFS1 {
    11 
    12     public static void main(String[] args) throws Exception {
    13 
    14         Configuration conf = new Configuration();
    15         System.setProperty("HADOOP_USER_NAME", "hadoop");
    16         conf.set("fs.defaultFS", "hdfs://hadoop1:9000");
    17         FileSystem fs = FileSystem.get(conf);
    18 
    19         /**
    20          * 列出指定的目录下的所有文件
    21          */
    22         RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
    23         while(listFiles.hasNext()){
    24             LocatedFileStatus file = listFiles.next();
    25             
    26             
    27             System.out.println(file.getPath()+"	");
    28             System.out.println(file.getPath().getName()+"	");
    29             System.out.println(file.getLen()+"	");
    30             System.out.println(file.getReplication()+"	");
    31             
    32             /**
    33              * blockLocations的长度是几?  是什么意义?
    34              * 
    35              * 块的数量
    36              */
    37             BlockLocation[] blockLocations = file.getBlockLocations();
    38             System.out.println(blockLocations.length+"	");
    39             
    40             for(BlockLocation bl : blockLocations){
    41                 String[] hosts = bl.getHosts();
    42                 
    43                 System.out.print(hosts[0] + "-" + hosts[1]+"	");
    44             }
    45             System.out.println();
    46             
    47         }
    48         
    49         
    50     }
    51 }
    View Code

    输出结果

    1 hdfs://hadoop1:9000/aa/bb/cc/hadoop.tar.gz    
    2 hadoop.tar.gz    
    3 199007110    
    4 2    
    5 3    
    6 hadoop3-hadoop1    hadoop1-hadoop2    hadoop1-hadoop4    
    View Code

    4、上传文件

     1 package com.exam.hdfs;
     2 
     3 import java.io.File;
     4 import java.io.FileInputStream;
     5 import java.io.InputStream;
     6 
     7 import org.apache.hadoop.conf.Configuration;
     8 import org.apache.hadoop.fs.FSDataOutputStream;
     9 import org.apache.hadoop.fs.FileSystem;
    10 import org.apache.hadoop.fs.Path;
    11 import org.apache.hadoop.io.IOUtils;
    12 
    13 public class UploadDataByStream {
    14 
    15     public static void main(String[] args) throws Exception {
    16         
    17         
    18         Configuration conf = new Configuration();
    19         System.setProperty("HADOOP_USER_NAME", "hadoop");
    20         conf.set("fs.defaultFS", "hdfs://hadoop1:9000");
    21         FileSystem fs = FileSystem.get(conf);
    22         
    23         
    24         InputStream in = new FileInputStream(new File("d:/abc.tar.gz"));
    25         FSDataOutputStream out = fs.create(new Path("/aa/abc.tar.gz"));
    26         
    27         
    28         IOUtils.copyBytes(in, out, 4096, true);
    29         
    30         fs.close();
    31         
    32     }
    33 }
    View Code

    5、下载文件

     1 package com.exam.hdfs;
     2 
     3 import java.io.File;
     4 import java.io.FileOutputStream;
     5 import java.io.OutputStream;
     6 
     7 import org.apache.hadoop.conf.Configuration;
     8 import org.apache.hadoop.fs.FSDataInputStream;
     9 import org.apache.hadoop.fs.FileSystem;
    10 import org.apache.hadoop.fs.Path;
    11 import org.apache.hadoop.io.IOUtils;
    12 
    13 public class DownloadDataByStream {
    14 
    15     
    16     public static void main(String[] args) throws Exception {
    17         
    18         Configuration conf = new Configuration();
    19         System.setProperty("HADOOP_USER_NAME", "hadoop");
    20         conf.set("fs.defaultFS", "hdfs://hadoop1:9000");
    21         FileSystem fs = FileSystem.get(conf);
    22         
    23         
    24         FSDataInputStream in = fs.open(new Path("/aa/abc.tar.gz"));
    25         OutputStream out = new FileOutputStream(new File("D:/abc.sh"));
    26         
    27         
    28         IOUtils.copyBytes(in, out, 4096, true);
    29         
    30         fs.close();
    31         
    32     }
    33 }
    View Code

    6、删除某个路径下特定类型的文件,比如class类型文件,比如txt类型文件

     1 package com.exam.hdfs;
     2 
     3 import java.net.URI;
     4 
     5 import org.apache.hadoop.conf.Configuration;
     6 import org.apache.hadoop.fs.FileStatus;
     7 import org.apache.hadoop.fs.FileSystem;
     8 import org.apache.hadoop.fs.Path;
     9 
    10 public class HDFS_DELETE_CLASS {
    11     
    12     public static final String FILETYPE = "tar.gz";
    13     public static final String DELETE_PATH = "/aa";
    14     
    15     public static void main(String[] args) throws Exception {
    16         
    17         new HDFS_DELETE_CLASS().rmrClassFile(new Path(DELETE_PATH));
    18     }
    19     
    20     public void rmrClassFile(Path path) throws Exception{
    21         
    22         // 首先获取集群必要的信息,以得到FileSystem的示例对象fs
    23         Configuration conf = new Configuration();
    24         FileSystem fs = FileSystem.get(new URI("hdfs://hadoop1:9000"), conf, "hadoop");
    25         
    26         // 首先检查path本身是文件夹还是目录
    27         FileStatus fileStatus = fs.getFileStatus(path);
    28         boolean directory = fileStatus.isDirectory();
    29         
    30         // 根据该目录是否是文件或者文件夹进行相应的操作
    31         if(directory){
    32             // 如果是目录
    33             checkAndDeleteDirectory(path, fs);
    34         }else{
    35             // 如果是文件,检查该文件名是不是FILETYPE类型的文件
    36             checkAndDeleteFile(path, fs);
    37         }
    38     }
    39     
    40     // 处理目录
    41     public static void checkAndDeleteDirectory(Path path, FileSystem fs) throws Exception{
    42         // 查看该path目录下一级子目录和子文件的状态
    43         FileStatus[] listStatus = fs.listStatus(path);
    44         for(FileStatus fStatus: listStatus){
    45             Path p = fStatus.getPath();
    46             // 如果是文件,并且是以FILETYPE结尾,则删掉,否则继续遍历下一级目录
    47             if(fStatus.isFile()){
    48                 checkAndDeleteFile(p, fs);
    49             }else{
    50                 checkAndDeleteDirectory(p, fs);
    51             }
    52         }
    53     }
    54     
    55     // 檢查文件是否符合刪除要求,如果符合要求則刪除,不符合要求则不做处理
    56     public static void checkAndDeleteFile(Path path, FileSystem fs) throws Exception{
    57         String name = path.getName();
    58         System.out.println(name);
    59         /*// 直接判断有没有FILETYPE这个字符串,不是特别稳妥,并且会有误操作,所以得判断是不是以FILETYPE结尾
    60         if(name.indexOf(FILETYPE) != -1){
    61             fs.delete(path, true);
    62         }*/
    63         // 判断是不是以FILETYPE结尾
    64         int startIndex = name.length() - FILETYPE.length();
    65         int endIndex = name.length();
    66         // 求得文件后缀名
    67         String fileSuffix = name.substring(startIndex, endIndex);
    68         if(fileSuffix.equals(FILETYPE)){
    69             fs.delete(path, true);
    70         }
    71     }
    72 }
    View Code

    7、删除HDFS集群中的所有空文件和空目录

      1 public class DeleteEmptyDirAndFile {
      2     
      3     static FileSystem fs = null;
      4 
      5     public static void main(String[] args) throws Exception {
      6         
      7         initFileSystem();
      8 
      9 //         创建测试数据
     10 //        makeTestData();
     11 
     12         // 删除测试数据
     13 //        deleteTestData();
     14 
     15         // 删除指定文件夹下的空文件和空文件夹
     16         deleteEmptyDirAndFile(new Path("/aa"));
     17     }
     18     
     19     /**
     20      * 删除指定文件夹下的 空文件 和 空文件夹
     21      * @throws Exception 
     22      */
     23     public static void deleteEmptyDirAndFile(Path path) throws Exception {
     24         
     25         //当是空文件夹时
     26         FileStatus[] listStatus = fs.listStatus(path);
     27         if(listStatus.length == 0){
     28             fs.delete(path, true);
     29             return;
     30         }
     31         
     32         // 该方法的结果:包括指定目录的  文件 和 文件夹
     33         RemoteIterator<LocatedFileStatus> listLocatedStatus = fs.listLocatedStatus(path);
     34         
     35         while (listLocatedStatus.hasNext()) {
     36             LocatedFileStatus next = listLocatedStatus.next();
     37 
     38             Path currentPath = next.getPath();
     39             // 获取父目录
     40             Path parent = next.getPath().getParent();
     41             
     42             // 如果是文件夹,继续往下遍历,删除符合条件的文件(空文件夹)
     43             if (next.isDirectory()) {
     44                 
     45                 // 如果是空文件夹
     46                 if(fs.listStatus(currentPath).length == 0){
     47                     // 删除掉
     48                     fs.delete(currentPath, true);
     49                 }else{
     50                     // 不是空文件夹,那么则继续遍历
     51                     if(fs.exists(currentPath)){
     52                         deleteEmptyDirAndFile(currentPath);
     53                     }
     54                 }
     55                 
     56             // 如果是文件
     57             } else {
     58                 // 获取文件的长度
     59                 long fileLength = next.getLen();
     60                 // 当文件是空文件时, 删除
     61                 if(fileLength == 0){
     62                     fs.delete(currentPath, true);
     63                 }
     64             }
     65             
     66             // 当空文件夹或者空文件删除时,有可能导致父文件夹为空文件夹,
     67             // 所以每次删除一个空文件或者空文件的时候都需要判断一下,如果真是如此,那么就需要把该文件夹也删除掉
     68             int length = fs.listStatus(parent).length;
     69             if(length == 0){
     70                 fs.delete(parent, true);
     71             }
     72         }
     73     }
     74     
     75     /**
     76      * 初始化FileSystem对象之用
     77      */
     78     public static void initFileSystem() throws Exception{
     79         Configuration conf = new Configuration();
     80         System.setProperty("HADOOP_USER_NAME", "hadoop");
     81         conf.addResource("config/core-site.xml");
     82         conf.addResource("config/hdfs-site.xml");
     83         fs = FileSystem.get(conf);
     84     }
     85 
     86     /**
     87      * 创建 测试 数据之用
     88      */
     89     public static void makeTestData() throws Exception {
     90         
     91         String emptyFilePath = "D:\bigdata\1704mr_test\empty.txt";
     92         String notEmptyFilePath = "D:\bigdata\1704mr_test\notEmpty.txt";
     93 
     94         // 空文件夹 和 空文件 的目录
     95         String path1 = "/aa/bb1/cc1/dd1/";
     96         fs.mkdirs(new Path(path1));
     97         fs.mkdirs(new Path("/aa/bb1/cc1/dd2/"));
     98         fs.copyFromLocalFile(new Path(emptyFilePath), new Path(path1));
     99         fs.copyFromLocalFile(new Path(notEmptyFilePath), new Path(path1));
    100 
    101         // 空文件 的目录
    102         String path2 = "/aa/bb1/cc2/dd2/";
    103         fs.mkdirs(new Path(path2));
    104         fs.copyFromLocalFile(new Path(emptyFilePath), new Path(path2));
    105 
    106         // 非空文件 的目录
    107         String path3 = "/aa/bb2/cc3/dd3";
    108         fs.mkdirs(new Path(path3));
    109         fs.copyFromLocalFile(new Path(notEmptyFilePath), new Path(path3));
    110 
    111         // 空 文件夹
    112         String path4 = "/aa/bb2/cc4/dd4";
    113         fs.mkdirs(new Path(path4));
    114 
    115         System.out.println("测试数据创建成功");
    116     }
    117 
    118     /**
    119      * 删除 指定文件夹
    120      * @throws Exception 
    121      */
    122     public static void deleteTestData() throws Exception {
    123         boolean delete = fs.delete(new Path("/aa"), true);
    124         System.out.println(delete ? "删除数据成功" : "删除数据失败");
    125     }
    126 
    127 }
    View Code

    8、手动拷贝某个特定的数据块(比如某个文件的第二个数据块)

     1 /**
     2      * 手动拷贝某个特定的数据块(比如某个文件的第二个数据块)
     3      * */
     4     public static void copyBlock(String str,int num) {
     5         
     6         Path path = new Path(str);
     7         
     8         BlockLocation[] localtions = new BlockLocation[0] ;
     9         
    10         try {
    11             FileStatus fileStatus = fs.getFileStatus(path);
    12             
    13             localtions = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
    14             
    15             /*for(int i=0;i<localtions.length;i++) {
    16                 //0,134217728,hadoop1,hadoop3
    17                 //134217728,64789382,hadoop3,hadoop1
    18                 System.out.println(localtions[i]);
    19             }*/
    20             
    21             /*System.out.println(localtions[num-1].getOffset());
    22             System.out.println(localtions[num-1].getLength());
    23             String[] hosts = localtions[num-1].getHosts();*/
    24             
    25             FSDataInputStream open = fs.open(path);
    26             open.seek(localtions[num-1].getOffset());
    27             OutputStream out = new FileOutputStream(new File("D:/abc.tar.gz"));
    28             IOUtils.copyBytes(open, out,4096,true);
    29             
    30             
    31             
    32         } catch (IOException e) {
    33             e.printStackTrace();
    34         }
    35         
    36     }
    View Code

    9、编写程序统计出HDFS文件系统中文件大小小于HDFS集群中的默认块大小的文件占比

     1 import org.apache.hadoop.conf.Configuration;
     2 import org.apache.hadoop.fs.FileSystem;
     3 import org.apache.hadoop.fs.LocatedFileStatus;
     4 import org.apache.hadoop.fs.Path;
     5 import org.apache.hadoop.fs.RemoteIterator;
     6 
     7 /**
     8  * 
     9  * 编写程序统计出HDFS文件系统中文件大小小于HDFS集群中的默认块大小的文件占比
    10  * 比如:大于等于128M的文件个数为98,小于128M的文件总数为2,所以答案是2%
    11  */
    12 public class Exam1_SmallFilePercent {
    13     
    14     private static int DEFAULT_BLOCKSIZE = 128 * 1024 * 1024;
    15 
    16     public static void main(String[] args) throws Exception {
    17         
    18         
    19         Configuration conf = new Configuration();
    20         conf.set("fs.defaultFS", "hdfs://hadoop1:9000");
    21         System.setProperty("HADOOP_USER_NAME", "hadoop");
    22         FileSystem fs = FileSystem.get(conf);
    23         
    24         
    25         Path path = new Path("/");
    26         float smallFilePercent = getSmallFilePercent(fs, path);
    27         System.out.println(smallFilePercent);
    28         
    29         
    30         fs.close();
    31     }
    32 
    33     /**
    34      * 该方法求出指定目录下的小文件和总文件数的对比
    35      * @throws Exception 
    36      */
    37     private static float getSmallFilePercent(FileSystem fs, Path path) throws Exception {
    38         // TODO Auto-generated method stub
    39         
    40         int smallFile = 0;
    41         int totalFile = 0;
    42         
    43         RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false);
    44         while(listFiles.hasNext()){
    45             totalFile++;
    46             LocatedFileStatus next = listFiles.next();
    47             long len = next.getLen();
    48             if(len < DEFAULT_BLOCKSIZE){
    49                 smallFile++;
    50             }
    51         }
    52         System.out.println(smallFile+" : "+totalFile);
    53         
    54         return smallFile * 1f /totalFile;
    55     }
    56     
    57 }
    View Code

    10、编写程序统计出HDFS文件系统中的平均数据块数(数据块总数/文件总数)

     1 import org.apache.hadoop.conf.Configuration;
     2 import org.apache.hadoop.fs.FileSystem;
     3 import org.apache.hadoop.fs.LocatedFileStatus;
     4 import org.apache.hadoop.fs.Path;
     5 import org.apache.hadoop.fs.RemoteIterator;
     6 
     7 /**
     8  * 
     9  * 编写程序统计出HDFS文件系统中的平均数据块数(数据块总数/文件总数)
    10  * 比如:一个文件有5个块,一个文件有3个块,那么平均数据块数为4
    11  * 如果还有一个文件,并且数据块就1个,那么整个HDFS的平均数据块数就是3
    12  */
    13 public class Exam2_HDSFAvgBlocks {
    14     
    15     public static void main(String[] args) throws Exception {
    16         
    17         
    18         Configuration conf = new Configuration();
    19         conf.set("fs.defaultFS", "hdfs://hadoop1:9000");
    20         System.setProperty("HADOOP_USER_NAME", "hadoop");
    21         FileSystem fs = FileSystem.get(conf);
    22         
    23         
    24         Path path = new Path("/");
    25         float avgHDFSBlocks = getHDFSAvgBlocks(fs, path);
    26         System.out.println("HDFS的平均数据块个数为:" + avgHDFSBlocks);
    27         
    28         
    29         fs.close();
    30     }
    31 
    32     /**
    33      * 求出指定目录下的所有文件的平均数据块个数
    34      */
    35     private static float getHDFSAvgBlocks(FileSystem fs, Path path) throws Exception {
    36         // TODO Auto-generated method stub
    37         
    38         int totalFiles = 0;        // 总文件数
    39         int totalBlocks = 0;    // 总数据块数
    40         
    41         RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false);
    42         
    43         while(listFiles.hasNext()){
    44             LocatedFileStatus next = listFiles.next();
    45             int length = next.getBlockLocations().length;
    46             totalBlocks += length;
    47             if(next.getLen() != 0){
    48                 totalFiles++;
    49             }
    50         }
    51         System.out.println(totalBlocks+" : "+totalFiles);
    52         
    53         return totalBlocks * 1f / totalFiles;
    54     }
    55     
    56 }
    View Code

    11、编写程序统计出HDFS文件系统中的平均副本数(副本总数/总数据块数)

     1 import org.apache.hadoop.conf.Configuration;
     2 import org.apache.hadoop.fs.FileSystem;
     3 import org.apache.hadoop.fs.LocatedFileStatus;
     4 import org.apache.hadoop.fs.Path;
     5 import org.apache.hadoop.fs.RemoteIterator;
     6 
     7 /**
     8  * 编写程序统计出HDFS文件系统中的平均副本数(副本总数/总数据块数)
     9  * 比如:总共两个文件,一个文件5个数据块,每个数据块3个副本,第二个文件2个数据块,每个文件2个副本,最终的平均副本数 = (3*3 + 2*2)/(3+2)= 2.8
    10  */
    11 public class Exam3_HDSFAvgBlockCopys {
    12     
    13     public static void main(String[] args) throws Exception {
    14         
    15         
    16         Configuration conf = new Configuration();
    17         conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
    18         System.setProperty("HADOOP_USER_NAME", "hadoop");
    19         FileSystem fs = FileSystem.get(conf);
    20         
    21         
    22         Path path = new Path("/");
    23         float avgHDFSBlockCopys = getHDFSAvgBlockCopys(fs, path);
    24         System.out.println("HDFS的平均数据块个数为:" + avgHDFSBlockCopys);
    25         
    26         
    27         fs.close();
    28     }
    29 
    30     /**
    31      * 求出指定目录下的所有文件的平均数据块个数
    32      */
    33     private static float getHDFSAvgBlockCopys(FileSystem fs, Path path) throws Exception {
    34         // TODO Auto-generated method stub
    35         
    36         int totalCopy = 0;        // 总副本数
    37         int totalBlocks = 0;    // 总数据块数
    38         
    39         RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false);
    40         
    41         while(listFiles.hasNext()){
    42             LocatedFileStatus next = listFiles.next();
    43 
    44             int length = next.getBlockLocations().length;
    45             short replication = next.getReplication();
    46             
    47             totalBlocks += length;
    48             totalCopy += length * replication;
    49         }
    50         System.out.println(totalCopy+" : "+totalBlocks);
    51         
    52         return totalCopy * 1f / totalBlocks;
    53     }
    54     
    55 }
    View Code

    12、统计HDFS整个文件系统中的不足指定数据块大小的数据块的比例

     1 import java.io.IOException;
     2 
     3 import org.apache.hadoop.conf.Configuration;
     4 import org.apache.hadoop.fs.BlockLocation;
     5 import org.apache.hadoop.fs.FileSystem;
     6 import org.apache.hadoop.fs.LocatedFileStatus;
     7 import org.apache.hadoop.fs.Path;
     8 import org.apache.hadoop.fs.RemoteIterator;
     9 
    10 /**
    11  * 统计HDFS整个文件系统中的不足指定数据块大小的数据块的比例
    12  * 比如指定的数据块大小是128M,总数据块有100个,不是大小为完整的128M的数据块有5个,那么不足指定数据块大小的数据块的比例就为5%
    13  * 注意:千万注意考虑不同文件的指定数据块大小可能不一致。所以千万不能用默认的128M一概而论
    14  */
    15 public class Exam4_LTBlockSize {
    16 
    17     public static void main(String[] args) throws Exception {
    18         
    19         Configuration conf = new Configuration();
    20         conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
    21         System.setProperty("HADOOP_USER_NAME", "hadoop");
    22         FileSystem fs = FileSystem.get(conf);
    23         
    24         Path path = new Path("/");
    25         float avgHDFSBlockCopys = getLessThanBlocksizeBlocks(fs, path);
    26         System.out.println("HDFS的不足指定数据块大小的数据块数目为:" + avgHDFSBlockCopys);
    27         
    28         fs.close();
    29     }
    30 
    31     private static float getLessThanBlocksizeBlocks(FileSystem fs, Path path) throws Exception {
    32         // TODO Auto-generated method stub
    33         
    34         int totalBlocks = 0;                // 总副本数
    35         int lessThenBlocksizeBlocks = 0;    // 总数据块数
    36         
    37         RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false);
    38         
    39         while(listFiles.hasNext()){
    40             LocatedFileStatus next = listFiles.next();
    41 
    42             BlockLocation[] blockLocations = next.getBlockLocations();
    43             int length = blockLocations.length;
    44             
    45             if(length != 0){
    46                 totalBlocks += length;
    47                 long lastBlockSize = blockLocations[length - 1].getLength();
    48                 long blockSize = next.getBlockSize();
    49                 if(lastBlockSize < blockSize){
    50                     lessThenBlocksizeBlocks++;
    51                 }
    52             }
    53         }
    54         System.out.println(lessThenBlocksizeBlocks+" : "+totalBlocks);
    55         
    56         return lessThenBlocksizeBlocks * 1f / totalBlocks;
    57     }
    58 }
    View Code

    13、统计出一个给定数组的蓄水总量(把数组的每个位置的数看是做地势高低)

      1 /**
      2         统计出一个给定数组的蓄水总量(把数组的每个位置的数看是做地势高低)
      3         比如:int[] intArray = new int[]{4,3,2,5,6,4,4,7}
      4         能蓄水:[0,1,2,0,0,2,2,0] 所以总量是:7
      5         
      6     核心思路:把数组切成很多个 01数组,每一层一个01数组,统计每个01数组中的合法0的总个数(数组的左边第一个1的中间区间中的0的个数)即可
      7  */
      8 public class Exam5_WaterStoreOfArray {
      9 
     10     public static void main(String[] args) {
     11         
     12 //        int[] intArray = new int[]{4,3,2,5,6,4,4,7};
     13 //        int[] intArray = new int[]{1,2,3,4,5,6};
     14         int[] intArray = new int[]{3,1,2,7,3,8,4,9,5,6};
     15         
     16         int totalWater = getArrayWater(intArray);
     17         System.out.println(totalWater);
     18     }
     19     
     20     /**
     21      * 求出数组中的水数
     22      */
     23     private static int getArrayWater(int[] intArray) {
     24         
     25         int findMaxValueOfArray = findMaxValueOfArray(intArray);
     26         int findMinValueOfArray = findMinValueOfArray(intArray);
     27         int length = intArray.length;
     28         
     29         int totalWater = 0;
     30         
     31         // 循环次数就是最大值和最小值的差
     32         for(int i=findMinValueOfArray; i<findMaxValueOfArray; i++){
     33             // 循环构造每一层的01数组
     34             int[] tempArray = new int[length];
     35             for(int j=0; j<length; j++){
     36                 if(intArray[j] > i){
     37                     tempArray[j] = 1;
     38                 }else{
     39                     tempArray[j] = 0;
     40                 }
     41             }
     42             // 获取每一个01数组的合法0个数
     43             int waterOfOneZeroArray = getWaterOfOneZeroArray(tempArray);
     44             totalWater += waterOfOneZeroArray;
     45         }
     46         return totalWater;
     47     }
     48     
     49 
     50     /**
     51      * 寻找逻辑是:从左右开始各找一个1,然后这两个1之间的所有0的个数,就是水数
     52      */
     53     private static int getWaterOfOneZeroArray(int[] tempArray) {
     54         
     55         int length = tempArray.length;
     56         int toatalWater = 0;
     57         
     58         // 找左边的1
     59         int i = 0;
     60         while(i < length){
     61             if(tempArray[i] == 1){
     62                 break;
     63             }
     64             i++;
     65         }
     66         
     67         // 从右边开始找1
     68         int j=length-1;
     69         while(j >= i){
     70             if(tempArray[j] == 1){
     71                 break;
     72             }
     73             j--;
     74         }
     75         
     76         // 找以上两个1之间的0的个数。
     77         if(i == j || i + 1 == j){
     78             return 0;
     79         }else{
     80             for(int k=i+1; k<j; k++){
     81                 if(tempArray[k] == 0){
     82                     toatalWater++;
     83                 }
     84             }
     85             return toatalWater;
     86         }
     87     }
     88 
     89     /**
     90      * 
     91      * 描述:找出一个数组中的最大值
     92      */
     93     public static int findMaxValueOfArray(int[] intArray){
     94         int length = intArray.length;
     95         if(length == 0){
     96             return 0;
     97         }else if(length == 1){
     98             return intArray[0];
     99         }else{
    100             int max = intArray[0];
    101             for(int i=1; i<length; i++){
    102                 if(intArray[i] > max){
    103                     max = intArray[i];
    104                 }
    105             }
    106             return max;
    107         }
    108     }
    109     
    110     /**
    111      * 找出一个数组中的最小值
    112      */
    113     public static int findMinValueOfArray(int[] intArray){
    114         int length = intArray.length;
    115         if(length == 0){
    116             return 0;
    117         }else if(length == 1){
    118             return intArray[0];
    119         }else{
    120             int min = intArray[0];
    121             for(int i=1; i<length; i++){
    122                 if(intArray[i] < min){
    123                     min = intArray[i];
    124                 }
    125             }
    126             return min;
    127         }
    128     }
    129 }
    View Code
  • 相关阅读:
    输出函数
    curl
    页眉的章名和章名不统一
    水平柱状图
    目录和正文的页码生成
    protobuf的使用
    yarn vue安装
    nvm node的安装
    win安装postman
    机器码
  • 原文地址:https://www.cnblogs.com/qingyunzong/p/8548727.html
Copyright © 2011-2022 走看看