zoukankan      html  css  js  c++  java
  • ext2/3/4的inode结构说明

    系统环境:Ubuntu15.10/ext4

    今天在复习《鸟哥的私房菜-基础学习篇》,看到inode大小为128bytes,想看下这128字节里面到底是什么样的。

    于是我查了下google,发现ext2/3是128字节,ext4是256字节,以下是ext2/ext4对应的结构。

    ext2.h/ext2_inode:

    294 /*
    295  * Structure of an inode on the disk
    296  */
    297 struct ext2_inode {
    298         __le16  i_mode;         /* File mode */
    299         __le16  i_uid;          /* Low 16 bits of Owner Uid */
    300         __le32  i_size;         /* Size in bytes */
    301         __le32  i_atime;        /* Access time */
    302         __le32  i_ctime;        /* Creation time */
    303         __le32  i_mtime;        /* Modification time */
    304         __le32  i_dtime;        /* Deletion Time */
    305         __le16  i_gid;          /* Low 16 bits of Group Id */
    306         __le16  i_links_count;  /* Links count */
    307         __le32  i_blocks;       /* Blocks count */
    308         __le32  i_flags;        /* File flags */
    309         union {
    310                 struct {
    311                         __le32  l_i_reserved1;
    312                 } linux1;
    313                 struct {
    314                         __le32  h_i_translator;
    315                 } hurd1;
    316                 struct {
    317                         __le32  m_i_reserved1;
    318                 } masix1;
    319         } osd1;                         /* OS dependent 1 */
    320         __le32  i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
    321         __le32  i_generation;   /* File version (for NFS) */
    322         __le32  i_file_acl;     /* File ACL */
    323         __le32  i_dir_acl;      /* Directory ACL */
    324         __le32  i_faddr;        /* Fragment address */
    325         union {
    326                 struct {
    327                         __u8    l_i_frag;       /* Fragment number */
    328                         __u8    l_i_fsize;      /* Fragment size */
    329                         __u16   i_pad1;
    330                         __le16  l_i_uid_high;   /* these 2 fields    */
    331                         __le16  l_i_gid_high;   /* were reserved2[0] */
    332                         __u32   l_i_reserved2;
    333                 } linux2;
    334                 struct {
    335                         __u8    h_i_frag;       /* Fragment number */
    336                         __u8    h_i_fsize;      /* Fragment size */
    337                         __le16  h_i_mode_high;
    338                         __le16  h_i_uid_high;
    339                         __le16  h_i_gid_high;
    340                         __le32  h_i_author;
    341                 } hurd2;
    342                 struct {
    343                         __u8    m_i_frag;       /* Fragment number */
    344                         __u8    m_i_fsize;      /* Fragment size */
    345                         __u16   m_pad1;
    346                         __u32   m_i_reserved2[2];
    347                 } masix2;
    348         } osd2;                         /* OS dependent 2 */
    349 };

    ext4.h/ext4_inode:

    688 /*
    689  * Structure of an inode on the disk
    690  */
    691 struct ext4_inode {
    692         __le16  i_mode;         /* File mode */
    693         __le16  i_uid;          /* Low 16 bits of Owner Uid */
    694         __le32  i_size_lo;      /* Size in bytes */
    695         __le32  i_atime;        /* Access time */
    696         __le32  i_ctime;        /* Inode Change time */
    697         __le32  i_mtime;        /* Modification time */
    698         __le32  i_dtime;        /* Deletion Time */
    699         __le16  i_gid;          /* Low 16 bits of Group Id */
    700         __le16  i_links_count;  /* Links count */
    701         __le32  i_blocks_lo;    /* Blocks count */
    702         __le32  i_flags;        /* File flags */
    703         union {
    704                 struct {
    705                         __le32  l_i_version;
    706                 } linux1;
    707                 struct {
    708                         __u32  h_i_translator;
    709                 } hurd1;
    710                 struct {
    711                         __u32  m_i_reserved1;
    712                 } masix1;
    713         } osd1;                         /* OS dependent 1 */
    714         __le32  i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
    715         __le32  i_generation;   /* File version (for NFS) */
    716         __le32  i_file_acl_lo;  /* File ACL */
    717         __le32  i_size_high;
    718         __le32  i_obso_faddr;   /* Obsoleted fragment address */
    719         union {
    720                 struct {
    721                         __le16  l_i_blocks_high; /* were l_i_reserved1 */
    722                         __le16  l_i_file_acl_high;
    723                         __le16  l_i_uid_high;   /* these 2 fields */
    724                         __le16  l_i_gid_high;   /* were reserved2[0] */
    725                         __le16  l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
    726                         __le16  l_i_reserved;
    727                 } linux2;
    728                 struct {
    729                         __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
    730                         __u16   h_i_mode_high;
    731                         __u16   h_i_uid_high;
    732                         __u16   h_i_gid_high;
    733                         __u32   h_i_author;
    734                 } hurd2;
    735                 struct {
    736                         __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
    737                         __le16  m_i_file_acl_high;
    738                         __u32   m_i_reserved2[2];
    739                 } masix2;
    740         } osd2;                         /* OS dependent 2 */
    741         __le16  i_extra_isize;
    742         __le16  i_checksum_hi;  /* crc32c(uuid+inum+inode) BE */
    743         __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
    744         __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
    745         __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
    746         __le32  i_crtime;       /* File Creation time */
    747         __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
    748         __le32  i_version_hi;   /* high 32 bits for 64-bit version */
    749         __le32  i_projid;       /* Project ID */
    750 };

    可以看见ext4_inode仅仅是最下面多了一点。

    以下是官方介绍:

    Inode Size
    
    In ext2 and ext3, the inode structure size was fixed at 128 bytes (EXT2_GOOD_OLD_INODE_SIZE) and each inode had a disk record size of 128 bytes.
    Starting with ext4, it is possible to allocate a larger on-disk inode at format time for all inodes in the filesystem to provide space beyond
    the end of the original ext2 inode. The on-disk inode record size is recorded in the superblock as s_inode_size. The number of bytes actually
    used by struct ext4_inode beyond the original 128-byte ext2 inode is recorded in the i_extra_isize field for each inode, which allows struct
    ext4_inode to grow for a new kernel without having to upgrade all of the on-disk inodes. Access to fields beyond EXT2_GOOD_OLD_INODE_SIZE should
    be verified to be within i_extra_isize. By default, ext4 inode records are 256 bytes, and (as of October 2013) the inode structure is 156 bytes
    (i_extra_isize = 28). The extra space between the end of the inode structure and the end of the inode record can be used to store extended
    attributes. Each inode record can be as large as the filesystem block size, though this is not terribly efficient.

    官方说2013年8月,i_extra_isize是28字节,ext4_inode是156字节(128+28)。但我今天贴的ext4_inode代码,后面的i_extra_isize部分应该

    是32字节(i_extra_isize + i_checksum_hi + 4 * 7),所以ext4_inode应该是160字节(128+32)。

    为了确定,我去github克隆了linus的linux源码。

    $ git clone https://github.com/torvalds/linux.git
    $ cd linux
    $ git log -L 753,761:fs/ext4/ext4.h

    753到761就是i_extra_isize下面的几行,可以自己查看一位置。

    结果log输出:

    commit 8b4953e13f4c5d9a3c869f5fca7d51e1700e7db0
    Author: Theodore Ts'o <tytso@mit.edu>
    Date:   Sat Oct 17 16:15:18 2015 -0400
    
        ext4: reserve code points for the project quota feature
        
        Signed-off-by: Theodore Ts'o <tytso@mit.edu>
    
    diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
    --- a/fs/ext4/ext4.h
    +++ b/fs/ext4/ext4.h
    @@ -687,8 +690,9 @@
            __le16  i_extra_isize;
            __le16  i_checksum_hi;  /* crc32c(uuid+inum+inode) BE */
            __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
            __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
            __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
            __le32  i_crtime;       /* File Creation time */
            __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
            __le32  i_version_hi;   /* high 32 bits for 64-bit version */
    +       __le32  i_projid;       /* Project ID */
    
    commit e615391896064eb5a0c760d086b8e1c6ecfffeab
    Author: Darrick J. Wong <djwong@us.ibm.com>
    Date:   Sun Apr 29 18:23:10 2012 -0400
    
        ext4: change on-disk layout to support extended metadata checksumming
        
        Define flags and change structure definitions to allow checksumming of
        ext4 metadata.
        
        Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
        Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
    
    diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
    --- a/fs/ext4/ext4.h
    +++ b/fs/ext4/ext4.h
    @@ -668,8 +674,8 @@
            __le16  i_extra_isize;
    -       __le16  i_pad1;
    +       __le16  i_checksum_hi;  /* crc32c(uuid+inum+inode) BE */
            __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
            __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
            __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
            __le32  i_crtime;       /* File Creation time */
            __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
            __le32  i_version_hi;   /* high 32 bits for 64-bit version */
    
    commit 3dcf54515aa4981a647ad74859199032965193a5
    Author: Christoph Hellwig <hch@lst.de>
    Date:   Tue Apr 29 18:13:32 2008 -0400
    
        ext4: move headers out of include/linux
        
        Move ext4 headers out of include/linux.  This is just the trivial move,
        there's some more thing that could be done later.
        
        Signed-off-by: Christoph Hellwig <hch@lst.de>
        Signed-off-by: Mingming Cao <cmm@us.ibm.com>
        Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
    
    diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
    --- /dev/null
    +++ b/fs/ext4/ext4.h
    @@ -0,0 +376,8 @@
    +       __le16  i_extra_isize;
    +       __le16  i_pad1;
    +       __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
    +       __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
    +       __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
    +       __le32  i_crtime;       /* File Creation time */
    +       __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
    +       __le32  i_version_hi;   /* high 32 bits for 64-bit version */

    这下就清楚了,可以看见,2015年10月17号有一次提交,加了一行“__le32 i_projid; /* Project ID */”,所以多了4字节,之前2012年和2008年的代码确实都是28字节。

    回到正题,linux的ext4文件系统的inode是256字节,ext4_inode却只有160字节,那160到255字节有啥用呢,它们可以用来存放其它属性,如ACL,SELinux的属性。

    I think by default current versions of mkfs.ext2/3/4 default to 256 byte inode size (see /etc/mke2fs.conf). This IIRC enables nanosecond 
    timestamps with ext4, and as you say, more extended attributes fit within the inode. Such extended attributes are, for instance, ACL's,
    SELinux labels, some Samba specific labels.
    Bigger inodes of course waste a little bit of space, and as you make them bigger you get into diminishing returns territory pretty quickly.
    The default 256 bytes is probably a perfectly good compromise for most situations.

    参考

    ext2.h:http://lxr.free-electrons.com/source/fs/ext2/ext2.h#L297

    ext4.h:http://lxr.free-electrons.com/source/fs/ext4/ext4.h#L688

    ext4的inode介绍:https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Inode_Size

    ext4扩展部分使用介绍:http://serverfault.com/questions/143691/linux-why-change-inode-size

    linux源码github地址:https://github.com/torvalds/linux

  • 相关阅读:
    【关键字】
    【选择结构语句:switch】
    【数据类型】
    【接口】
    【抽象类和接口的区别】
    【访问权限】
    【内部类】
    【方法】
    【this 关键字】
    【Static】
  • 原文地址:https://www.cnblogs.com/liuxuzzz/p/5348545.html
Copyright © 2011-2022 走看看