zoukankan      html  css  js  c++  java
  • Hadoop文件解压缩

    Class
    org.apache.hadoop.io.compress .CompressionCodecFactory
    A factory that will find the correct codec for a given filename.

    Method
    CompressionCodec getCodec(Path file)
    Find the relevant compression codec for the given file based on its filename suffix.
    获得这个压缩数据文件採用哪种压缩数据算法。

    package Compress;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IOUtils;
    import org.apache.hadoop.io.compress.CompressionCodec;
    import org.apache.hadoop.io.compress.CompressionCodecFactory;
    import org.apache.hadoop.io.compress.CompressionInputStream;
    import org.apache.hadoop.mapreduce.Job;
    
    /**
     * 解压缩
     * @author liguodong
     */
    public class Decompression {
    
        final static String file = "/liguodong/data.gz";
        public static void main(String[] args) throws IOException {
    
            Configuration conf = new Configuration(); 
            Job job = Job.getInstance(conf, "DeCodec");  
            //打包执行必须执行的方法
            job.setJarByClass(Decompression.class);
    
            CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
            //返回一个解压缩的实例
            CompressionCodec codec = codecFactory.getCodec(new Path(file));
            //返回被算法解压了的输入流
            CompressionInputStream inputStream = codec.createInputStream
                    (new FileInputStream(new File(file)));
            //将输入流文件写出到去除了扩展名的文件
            FileOutputStream outputStream = new FileOutputStream
                    (new File(codecFactory.removeSuffix(file, codec.getDefaultExtension())));
            IOUtils.copyBytes(inputStream, outputStream, conf);
    
        }
    }
    

    打成jar包:Decodec.jar

    [root@master liguodong]# yarn jar Decodec.jar
    15/06/05 21:54:25 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library
    [root@master liguodong]# ll
    总用量 524824
    -rw-r--r-- 1 root root      1492 6月   5 19:47 codec.jar
    -rw-r--r-- 1 root root 536870912 6月   5 21:54 data
    -rw-r--r-- 1 root root    521844 6月   5 21:40 data.gz
  • 相关阅读:
    【Hibernate 5】继承映射配置及多态查询
    【Hibernate 4】一对多映射配置
    【Hibernate 3】一对一映射配置
    【ITOO 2】使用ArrayList时的注意事项:去除多余的null值
    ubuntu查看端口占用
    ubuntu安装LAMP环境
    @ResponseBody返回不能正确接收
    ubuntu apt常用命令
    ubuntu添加sudo权限
    ubuntu 13.10 skype登不上问题
  • 原文地址:https://www.cnblogs.com/claireyuancy/p/7255676.html
Copyright © 2011-2022 走看看