# 概述

本篇博客中，我们将仔细分析 如何从格式化为ext2文件系统的磁盘中读取超级块并填充内存超级块结构，每次将一个 格式化了ext2文件系统的磁盘（分区）挂载到挂载点的时候会调用该方法，该方法在操作系统中的实现主要是函数ext2_fill_super。

# 实现

在ext2系列之前的博客中我们描述了ext2的磁盘划分，所以读取超级块的过程也就显得比较简单，只是在读取完成后可能需要进行一些列的检查等。废话不多说，我们直接来看该函数的实现。我们分为几段来阐述其实现机理。

 1 static int ext2_fill_super(struct super_block *sb, void *data, int silent)
2 {
4     struct ext2_sb_info * sbi;
5     struct ext2_super_block * es;
6     struct inode *root;
7     unsigned long block;
8     unsigned long sb_block = get_sb_block(&data);
9     unsigned long logic_sb_block;
10     unsigned long offset = 0;
11     unsigned long def_mount_opts;
12     long ret = -EINVAL;
13     //default block size is 1024B
14     int blocksize = BLOCK_SIZE;
15     int db_count;
16     int i, j;
17     __le32 features;
18     int err;
19
20     //allocate memory ext2_super_block in memory
21     sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
22     if (!sbi)
23         return -ENOMEM;
24
25     sbi->s_blockgroup_lock =
26         kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
27     if (!sbi->s_blockgroup_lock) {
28         kfree(sbi);
29         return -ENOMEM;
30     }
31     //sb is vfs super_block
32     //sb->s_fs_info is specific file system super block
33     sb->s_fs_info = sbi;
34     sbi->s_sb_block = sb_block;
35
36     spin_lock_init(&sbi->s_lock);
37
38     /*
39      * See what the current blocksize for the device is, and
40      * use that as the blocksize.  Otherwise (or if the blocksize
41      * is smaller than the default) use the default.
42      * This is important for devices that have a hardware
43      * sectorsize that is larger than the default.
44      */
45      //the block size can't be smaller than BLOCK_SIZE=1024B
46      //and block size must be smaller than PAGE_SIZE = 4096B now
47     blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
48     if (!blocksize) {
49         ext2_msg(sb, KERN_ERR, "error: unable to set blocksize");
50         goto failed_sbi;
51     }
52
53     /*
54      * If the superblock doesn't start on a hardware sector boundary,
55      * calculate the offset.
56      */
57      //blocksize may bigger than BLOCK_SIZE=1024B
58      //because we read blocksize bytes data from disk
59      //Block 0 is 1024B and super_block is also 1024B
60      //if blocksize is not 1024B,it must be bigger than 1024B,for example,if blocksize is 2048B
61      //we must read block 0(first 2048B on disk),then we read offset 1024~2047 as super block
62     if (blocksize != BLOCK_SIZE) {
63         logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
64         offset = (sb_block*BLOCK_SIZE) % blocksize;
65     } else {
66         logic_sb_block = sb_block;
67     }
68     //read block @logic_sb_block containg super block
69     if (!(bh = sb_bread(sb, logic_sb_block))) {
70         ext2_msg(sb, KERN_ERR, "error: unable to read superblock");
71         goto failed_sbi;
72     }
73     /*
74      * Note: s_es must be initialized as soon as possible because
75      *       some ext2 macro-instructions depend on its value
76      */
77     es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
78     //sbi is ext2_super_block in memory while sbi->s_es is ext2_super_block on disk
79     sbi->s_es = es;

1. 超级块的起始位置在哪？
2. 超级块的大小是多少？
3. 在实现中我们自己定义的块大小（默认1024）与磁盘设备的块大小如果不一致怎么办？

ext2_fill_super所展示的第一段代码所做工作主要有：
1. 分配ext2内存超级块结构struct ext2_sb_info，如果分配内存失败，则直接返回-ENOMEM;
2. 确定逻辑磁盘块大小，比较默认逻辑块大小和真实逻辑块大小（根据磁盘设备的一些信息确定），将最大者设置为逻辑块大小，但注意：该最大者必须是2的次幂且不可大于4096
3. 从磁盘上读出超级块根据2中计算的块大小确定超级块所在逻辑块号和块内偏移，读出超级块，存储在1中分配的内存超级块结构中sbi->s_es = es。

上文描述的第一阶段从磁盘上读出了超级块内容，接下来我们就要根据磁盘上的超级块结构来初始化内存超级块结构，在这个过程中可能还伴随着磁盘超级块内容的检查，确认其是否已经损坏等。
 1 sb->s_magic = le16_to_cpu(es->s_magic);
2
3     if (sb->s_magic != EXT2_SUPER_MAGIC)
4         goto cantfind_ext2;
5
6     /* Set defaults before we parse the mount options */
7     /* 接下来这段根据磁盘超级块
8     ** 结构来设置内存超级块结构的部分选项
9     ** 相比较而言这些选项的重要性没那么高
10     */
11     def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
12     if (def_mount_opts & EXT2_DEFM_DEBUG)
13         set_opt(sbi->s_mount_opt, DEBUG);
14     if (def_mount_opts & EXT2_DEFM_BSDGROUPS)
15         set_opt(sbi->s_mount_opt, GRPID);
16     if (def_mount_opts & EXT2_DEFM_UID16)
17         set_opt(sbi->s_mount_opt, NO_UID32);
18 #ifdef CONFIG_EXT2_FS_XATTR
19     if (def_mount_opts & EXT2_DEFM_XATTR_USER)
20         set_opt(sbi->s_mount_opt, XATTR_USER);
21 #endif
22 #ifdef CONFIG_EXT2_FS_POSIX_ACL
23     if (def_mount_opts & EXT2_DEFM_ACL)
24         set_opt(sbi->s_mount_opt, POSIX_ACL);
25 #endif
26     /* 这个选项决定了挂载出错时的处理方法
27     ** 如PANIC即指示出错就奔溃...
28     */
29     if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
30         set_opt(sbi->s_mount_opt, ERRORS_PANIC);
31     else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_CONTINUE)
32         set_opt(sbi->s_mount_opt, ERRORS_CONT);
33     else
34         set_opt(sbi->s_mount_opt, ERRORS_RO);
35
36     sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
37     sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
38
39     set_opt(sbi->s_mount_opt, RESERVATION);
40
41     if (!parse_options((char *) data, sb))
42         goto failed_mount;
43
44     sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
45         ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
46          MS_POSIXACL : 0);
47
48     ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
49                     EXT2_MOUNT_XIP if not */
50
51     if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
52         (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
53          EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
54          EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
55         ext2_msg(sb, KERN_WARNING,
56             "warning: feature flags set on rev 0 fs, "
57             "running e2fsck is recommended");
58     /*
59      * Check feature flags regardless of the revision level, since we
60      * previously didn't change the revision level when setting the flags,
61      * so there is a chance incompat flags are set on a rev 0 filesystem.
62      */
63     features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
64     if (features) {
65         ext2_msg(sb, KERN_ERR,    "error: couldn't mount because of "
66                "unsupported optional features (%x)",
67             le32_to_cpu(features));
68         goto failed_mount;
69     }
70     if (!(sb->s_flags & MS_RDONLY) &&
71         (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
72         ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of "
73                "unsupported optional features (%x)",
74                le32_to_cpu(features));
75         goto failed_mount;
76     }

第二阶段初始化内存超级块的只是一些比较简单的选项，到了这个阶段，初始化的东西就比较重要了，它关乎着文件系统的正确性。因此我们作比较详细的分析。
/*
** 超级块中可能记录着逻辑块大小，因此我们必须
** 以此为准
*/
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);

if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) {
if (!silent)
ext2_msg(sb, KERN_ERR,
"error: unsupported blocksize for xip");
goto failed_mount;
}

/* If the blocksize doesn't match, re-read the thing.. */
/* 如果块大小和我们之前确定的不太一样
** 我们有必要重新读一次超级块
** 因为之前读的可能并不准确
*/
if (sb->s_blocksize != blocksize) {
brelse(bh);

if (!sb_set_blocksize(sb, blocksize)) {
ext2_msg(sb, KERN_ERR, "error: blocksize is too small");
goto failed_sbi;
}

logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
offset = (sb_block*BLOCK_SIZE) % blocksize;
if(!bh) {
"superblock on 2nd try");
goto failed_sbi;
}
es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
ext2_msg(sb, KERN_ERR, "error: magic mismatch");
goto failed_mount;
}
}

/* 计算ext2最大可支持文件的大小*/
sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);

if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
sbi->s_first_ino = EXT2_GOOD_OLD_FIRST_INO;
} else {
sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
!is_power_of_2(sbi->s_inode_size) ||
(sbi->s_inode_size > blocksize)) {
ext2_msg(sb, KERN_ERR,
"error: unsupported inode size: %d",
sbi->s_inode_size);
goto failed_mount;
}
}

/*  对于逻辑块较大的ext2文件系统，为了
**  减少块内碎片问题，设置了fragment，
**  即每个磁盘块内可再细分成多个fragment
**  这个思想源自FFS，对于1024大小的磁盘块
**  也就没有必要再划分fragment了
**  因为最小的fragment大小就是1024字节
*/
sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
if (sbi->s_frag_size == 0)
goto cantfind_ext2;
/* 初始化一些静态信息*/
sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;

sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);

if (EXT2_INODE_SIZE(sb) == 0)
goto cantfind_ext2;
sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0)
goto cantfind_ext2;
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
sbi->s_desc_per_block = sb->s_blocksize /
sizeof (struct ext2_group_desc);
sbi->s_sbh = bh;
sbi->s_mount_state = le16_to_cpu(es->s_state);
sbi->s_desc_per_block_bits =
ilog2 (EXT2_DESC_PER_BLOCK(sb));

if (sb->s_magic != EXT2_SUPER_MAGIC)
goto cantfind_ext2;

if (sb->s_blocksize != bh->b_size) {
if (!silent)
ext2_msg(sb, KERN_ERR, "error: unsupported blocksize");
goto failed_mount;
}

/* 目前仅支持块大小和fragment size大小相同*/
if (sb->s_blocksize != sbi->s_frag_size) {
ext2_msg(sb, KERN_ERR,
"error: fragsize %lu != blocksize %lu"
"(not supported yet)",
sbi->s_frag_size, sb->s_blocksize);
goto failed_mount;
}

if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
"error: #blocks per group too big: %lu",
sbi->s_blocks_per_group);
goto failed_mount;
}
if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
"error: #fragments per group too big: %lu",
sbi->s_frags_per_group);
goto failed_mount;
}
if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
"error: #inodes per group too big: %lu",
sbi->s_inodes_per_group);
goto failed_mount;
}

if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
goto cantfind_ext2;
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) - 1)
/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
ext2_msg(sb, KERN_ERR, "error: not enough memory");
goto failed_mount;
}
bgl_lock_init(sbi->s_blockgroup_lock);
/* 这个数据结构干嘛的现在还不得而知*/
sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL);
if (!sbi->s_debts) {
ext2_msg(sb, KERN_ERR, "error: not enough memory");
goto failed_mount_group_desc;
}

/* 读出块组描述符信息 */
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logic_sb_block, i);
if (!sbi->s_group_desc[i]) {
for (j = 0; j < i; j++)
brelse (sbi->s_group_desc[j]);
ext2_msg(sb, KERN_ERR,
"error: unable to read group descriptors");
goto failed_mount_group_desc;
}
}
if (!ext2_check_descriptors (sb)) {
ext2_msg(sb, KERN_ERR, "group descriptors corrupted");
goto failed_mount2;
}
sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));

spin_lock_init(&sbi->s_next_gen_lock);

/* per fileystem reservation list head & lock */
//init something for reservation windows of every file
spin_lock_init(&sbi->s_rsv_window_lock);
sbi->s_rsv_window_root = RB_ROOT;
/*
* Add a single, static dummy reservation to the start of the
* reservation window list --- it gives us a placeholder for
* append-at-start-of-list which makes the allocation logic
* _much_ simpler.
*/
/* 初始化内存超级块的预留窗口
** 所谓的预留窗口是在分配数据块的时候
** 每一次多分配一点，以提高文件数据存储
** 的连续性
*/

err = percpu_counter_init(&sbi->s_freeblocks_counter,
ext2_count_free_blocks(sb));
if (!err) {
err = percpu_counter_init(&sbi->s_freeinodes_counter,
ext2_count_free_inodes(sb));
}
if (!err) {
err = percpu_counter_init(&sbi->s_dirs_counter,
ext2_count_dirs(sb));
}
if (err) {
ext2_msg(sb, KERN_ERR, "error: insufficient memory");
goto failed_mount3;
}
/*
* set up enough so that it can read an inode
*/
sb->s_op = &ext2_sops;
sb->s_export_op = &ext2_export_ops;
sb->s_xattr = ext2_xattr_handlers;

#ifdef CONFIG_QUOTA
sb->dq_op = &dquot_operations;
sb->s_qcop = &dquot_quotactl_ops;
#endif

当超级块完全读出并构造内存超级块以后，接下来就是构造根目录了，让我们直接看代码：
/* 读根目录的inode，inode号为默认值2
** 读出后保存在内存inode结构中
*/
root = ext2_iget(sb, EXT2_ROOT_INO);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto failed_mount3;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
iput(root);
ext2_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
goto failed_mount3;
}

/* 分配根目录的内存目录项
** 因为根目录没有父目录这个概念
** 因此，没法从其父目录中读出其目录
** 只能在内存中构造一个
*/
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
iput(root);
ext2_msg(sb, KERN_ERR, "error: get root inode failed");
ret = -ENOMEM;
goto failed_mount3;
}
if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
ext2_msg(sb, KERN_WARNING,
"warning: mounting ext3 filesystem as ext2");
if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY))
sb->s_flags |= MS_RDONLY;

/* 在填充超级块时有可能会修改磁盘超级块
** 因此有必要作一次写回操作
*/
ext2_write_super(sb);
return 0;

|
2月前
|

xv6(10) 文件系统：磁盘&缓冲区

58 0
|
11月前
7.1.5 EXT2/EXT3/EXT4 文件的存取与日志式文件系统的功能
7.1.5 EXT2/EXT3/EXT4 文件的存取与日志式文件系统的功能
80 0

2238 0
|

|
Oracle 关系型数据库 Unix