zoukankan      html  css  js  c++  java
  • hadoop FileSplit

    /** A section of an input file. Returned by {@link
    * InputFormat#getSplits(JobContext)} and passed to
    * {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}. 
    * 
    * 文件的一部分,通过InputFormat#getSplits(JobContext)生成
    * 作为参数生产RecordReader:InputFormat#createRecordReader(InputSplit,TaskAttemptContext)
    * 实现了InputSplit接口
    */
    @InterfaceAudience.Public
    @InterfaceStability.Stable
    public class FileSplit extends InputSplit implements Writable {
    private Path file;
    private long start;
    private long length;
    private String[] hosts;
    private SplitLocationInfo[] hostInfos;
    
    public FileSplit() {}
    
    /** Constructs a split with host information
    *
    * @param file the file name。 文件名称
    * @param start the position of the first byte in the file to process。第一个byte的偏移量
    * @param length the number of bytes in the file to process。 split的长度
    * @param hosts the list of hosts containing the block, possibly null。 split所在的主机列表
    */
    public FileSplit(Path file, long start, long length, String[] hosts) {
    this.file = file;
    this.start = start;
    this.length = length;
    this.hosts = hosts;
    }
    
    /** Constructs a split with host and cached-blocks information
    *
    * @param file the file name。 文件名称
    * @param start the position of the first byte in the file to process。第一个byte的偏移量
    * @param length the number of bytes in the file to process split的长度
    * @param hosts the list of hosts containing the block split所在的主机列表
    * @param inMemoryHosts the list of hosts containing the block in memory 在内存中保存block的机器列表
    */
    public FileSplit(Path file, long start, long length, String[] hosts,
    String[] inMemoryHosts) {
    this(file, start, length, hosts);
    hostInfos = new SplitLocationInfo[hosts.length];
    for (int i = 0; i < hosts.length; i++) {
    // because N will be tiny, scanning is probably faster than a HashSet
    boolean inMemory = false;
    for (String inMemoryHost : inMemoryHosts) {
    if (inMemoryHost.equals(hosts[i])) {
    inMemory = true;
    break;
    }
    }
    hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory);
    }
    }
    
    /** The file containing this split's data. */
    public Path getPath() { return file; }
    
    /** The position of the first byte in the file to process. */
    public long getStart() { return start; }
    
    /** The number of bytes in the file to process. */
    @Override
    public long getLength() { return length; }
    
    @Override
    public String toString() { return file + ":" + start + "+" + length; }
    
    ////////////////////////////////////////////
    // Writable methods
    ////////////////////////////////////////////
    
    @Override
    public void write(DataOutput out) throws IOException {
    Text.writeString(out, file.toString());
    out.writeLong(start);
    out.writeLong(length);
    }
    
    @Override
    public void readFields(DataInput in) throws IOException {
    file = new Path(Text.readString(in));
    start = in.readLong();
    length = in.readLong();
    hosts = null;
    }
    
    @Override
    public String[] getLocations() throws IOException {
    if (this.hosts == null) {
    return new String[]{};
    } else {
    return this.hosts;
    }
    }
    
    @Override
    @Evolving
    public SplitLocationInfo[] getLocationInfo() throws IOException {
    return hostInfos;
    }
    }
  • 相关阅读:
    unix/linux中如何在vi编辑器中方便的跳转到首行和末行?
    如何在Ubuntu中用firefox浏览器查看chm文档?
    sybase数据库技术 :游标可更新与for read only/for update
    PropertyMetadata和UIPropertyMetadata的一点区别
    wpf,离线状态下部分功能不可用。
    C#操作注册服务卸载服务启动服务停止服务.. .
    ContentControl与ContentPresenter区别?
    wpf telerik中的book控件
    C#写入和读出文本文件
    WPF 点击Calendar后,需要点击两次按钮
  • 原文地址:https://www.cnblogs.com/fantiantian/p/9340239.html
Copyright © 2011-2022 走看看