系统环境:Ubuntu15.10/ext4
今天在复习《鸟哥的私房菜-基础学习篇》,看到inode大小为128bytes,想看下这128字节里面到底是什么样的。
于是我查了下google,发现ext2/3是128字节,ext4是256字节,以下是ext2/ext4对应的结构。
ext2.h/ext2_inode:
294 /* 295 * Structure of an inode on the disk 296 */ 297 struct ext2_inode { 298 __le16 i_mode; /* File mode */ 299 __le16 i_uid; /* Low 16 bits of Owner Uid */ 300 __le32 i_size; /* Size in bytes */ 301 __le32 i_atime; /* Access time */ 302 __le32 i_ctime; /* Creation time */ 303 __le32 i_mtime; /* Modification time */ 304 __le32 i_dtime; /* Deletion Time */ 305 __le16 i_gid; /* Low 16 bits of Group Id */ 306 __le16 i_links_count; /* Links count */ 307 __le32 i_blocks; /* Blocks count */ 308 __le32 i_flags; /* File flags */ 309 union { 310 struct { 311 __le32 l_i_reserved1; 312 } linux1; 313 struct { 314 __le32 h_i_translator; 315 } hurd1; 316 struct { 317 __le32 m_i_reserved1; 318 } masix1; 319 } osd1; /* OS dependent 1 */ 320 __le32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */ 321 __le32 i_generation; /* File version (for NFS) */ 322 __le32 i_file_acl; /* File ACL */ 323 __le32 i_dir_acl; /* Directory ACL */ 324 __le32 i_faddr; /* Fragment address */ 325 union { 326 struct { 327 __u8 l_i_frag; /* Fragment number */ 328 __u8 l_i_fsize; /* Fragment size */ 329 __u16 i_pad1; 330 __le16 l_i_uid_high; /* these 2 fields */ 331 __le16 l_i_gid_high; /* were reserved2[0] */ 332 __u32 l_i_reserved2; 333 } linux2; 334 struct { 335 __u8 h_i_frag; /* Fragment number */ 336 __u8 h_i_fsize; /* Fragment size */ 337 __le16 h_i_mode_high; 338 __le16 h_i_uid_high; 339 __le16 h_i_gid_high; 340 __le32 h_i_author; 341 } hurd2; 342 struct { 343 __u8 m_i_frag; /* Fragment number */ 344 __u8 m_i_fsize; /* Fragment size */ 345 __u16 m_pad1; 346 __u32 m_i_reserved2[2]; 347 } masix2; 348 } osd2; /* OS dependent 2 */ 349 };
ext4.h/ext4_inode:
688 /* 689 * Structure of an inode on the disk 690 */ 691 struct ext4_inode { 692 __le16 i_mode; /* File mode */ 693 __le16 i_uid; /* Low 16 bits of Owner Uid */ 694 __le32 i_size_lo; /* Size in bytes */ 695 __le32 i_atime; /* Access time */ 696 __le32 i_ctime; /* Inode Change time */ 697 __le32 i_mtime; /* Modification time */ 698 __le32 i_dtime; /* Deletion Time */ 699 __le16 i_gid; /* Low 16 bits of Group Id */ 700 __le16 i_links_count; /* Links count */ 701 __le32 i_blocks_lo; /* Blocks count */ 702 __le32 i_flags; /* File flags */ 703 union { 704 struct { 705 __le32 l_i_version; 706 } linux1; 707 struct { 708 __u32 h_i_translator; 709 } hurd1; 710 struct { 711 __u32 m_i_reserved1; 712 } masix1; 713 } osd1; /* OS dependent 1 */ 714 __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ 715 __le32 i_generation; /* File version (for NFS) */ 716 __le32 i_file_acl_lo; /* File ACL */ 717 __le32 i_size_high; 718 __le32 i_obso_faddr; /* Obsoleted fragment address */ 719 union { 720 struct { 721 __le16 l_i_blocks_high; /* were l_i_reserved1 */ 722 __le16 l_i_file_acl_high; 723 __le16 l_i_uid_high; /* these 2 fields */ 724 __le16 l_i_gid_high; /* were reserved2[0] */ 725 __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ 726 __le16 l_i_reserved; 727 } linux2; 728 struct { 729 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ 730 __u16 h_i_mode_high; 731 __u16 h_i_uid_high; 732 __u16 h_i_gid_high; 733 __u32 h_i_author; 734 } hurd2; 735 struct { 736 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ 737 __le16 m_i_file_acl_high; 738 __u32 m_i_reserved2[2]; 739 } masix2; 740 } osd2; /* OS dependent 2 */ 741 __le16 i_extra_isize; 742 __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ 743 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ 744 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ 745 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ 746 __le32 i_crtime; /* File Creation time */ 747 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ 748 __le32 i_version_hi; /* high 32 bits for 64-bit version */ 749 __le32 i_projid; /* Project ID */ 750 };
可以看见ext4_inode仅仅是最下面多了一点。
以下是官方介绍:
Inode Size In ext2 and ext3, the inode structure size was fixed at 128 bytes (EXT2_GOOD_OLD_INODE_SIZE) and each inode had a disk record size of 128 bytes.
Starting with ext4, it is possible to allocate a larger on-disk inode at format time for all inodes in the filesystem to provide space beyond
the end of the original ext2 inode. The on-disk inode record size is recorded in the superblock as s_inode_size. The number of bytes actually
used by struct ext4_inode beyond the original 128-byte ext2 inode is recorded in the i_extra_isize field for each inode, which allows struct
ext4_inode to grow for a new kernel without having to upgrade all of the on-disk inodes. Access to fields beyond EXT2_GOOD_OLD_INODE_SIZE should
be verified to be within i_extra_isize. By default, ext4 inode records are 256 bytes, and (as of October 2013) the inode structure is 156 bytes
(i_extra_isize = 28). The extra space between the end of the inode structure and the end of the inode record can be used to store extended
attributes. Each inode record can be as large as the filesystem block size, though this is not terribly efficient.
官方说2013年8月,i_extra_isize是28字节,ext4_inode是156字节(128+28)。但我今天贴的ext4_inode代码,后面的i_extra_isize部分应该
是32字节(i_extra_isize + i_checksum_hi + 4 * 7),所以ext4_inode应该是160字节(128+32)。
为了确定,我去github克隆了linus的linux源码。
$ git clone https://github.com/torvalds/linux.git $ cd linux $ git log -L 753,761:fs/ext4/ext4.h
753到761就是i_extra_isize下面的几行,可以自己查看一位置。
结果log输出:
commit 8b4953e13f4c5d9a3c869f5fca7d51e1700e7db0 Author: Theodore Ts'o <tytso@mit.edu> Date: Sat Oct 17 16:15:18 2015 -0400 ext4: reserve code points for the project quota feature Signed-off-by: Theodore Ts'o <tytso@mit.edu> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -687,8 +690,9 @@ __le16 i_extra_isize; __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ __le32 i_crtime; /* File Creation time */ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ __le32 i_version_hi; /* high 32 bits for 64-bit version */ + __le32 i_projid; /* Project ID */ commit e615391896064eb5a0c760d086b8e1c6ecfffeab Author: Darrick J. Wong <djwong@us.ibm.com> Date: Sun Apr 29 18:23:10 2012 -0400 ext4: change on-disk layout to support extended metadata checksumming Define flags and change structure definitions to allow checksumming of ext4 metadata. Signed-off-by: Darrick J. Wong <djwong@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -668,8 +674,8 @@ __le16 i_extra_isize; - __le16 i_pad1; + __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ __le32 i_crtime; /* File Creation time */ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ __le32 i_version_hi; /* high 32 bits for 64-bit version */ commit 3dcf54515aa4981a647ad74859199032965193a5 Author: Christoph Hellwig <hch@lst.de> Date: Tue Apr 29 18:13:32 2008 -0400 ext4: move headers out of include/linux Move ext4 headers out of include/linux. This is just the trivial move, there's some more thing that could be done later. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h --- /dev/null +++ b/fs/ext4/ext4.h @@ -0,0 +376,8 @@ + __le16 i_extra_isize; + __le16 i_pad1; + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ + __le32 i_version_hi; /* high 32 bits for 64-bit version */
这下就清楚了,可以看见,2015年10月17号有一次提交,加了一行“__le32 i_projid; /* Project ID */”,所以多了4字节,之前2012年和2008年的代码确实都是28字节。
回到正题,linux的ext4文件系统的inode是256字节,ext4_inode却只有160字节,那160到255字节有啥用呢,它们可以用来存放其它属性,如ACL,SELinux的属性。
I think by default current versions of mkfs.ext2/3/4 default to 256 byte inode size (see /etc/mke2fs.conf). This IIRC enables nanosecond
timestamps with ext4, and as you say, more extended attributes fit within the inode. Such extended attributes are, for instance, ACL's,
SELinux labels, some Samba specific labels. Bigger inodes of course waste a little bit of space, and as you make them bigger you get into diminishing returns territory pretty quickly.
The default 256 bytes is probably a perfectly good compromise for most situations.
参考:
ext2.h:http://lxr.free-electrons.com/source/fs/ext2/ext2.h#L297
ext4.h:http://lxr.free-electrons.com/source/fs/ext4/ext4.h#L688
ext4的inode介绍:https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Inode_Size
ext4扩展部分使用介绍:http://serverfault.com/questions/143691/linux-why-change-inode-size
linux源码github地址:https://github.com/torvalds/linux