今天想看下dd命令启动后调用的系统调用。于是就用strace了一把,考虑到dd命令是需要带参数的,所有执行一个字节的写,写10次,所在文件系统为xfs, 执行如下。
如下,注释直接加下输出的内容中了:
# strace dd if=/dev/zero of=test.log bs=1 count=10
execve("/usr/bin/dd", ["dd", "if=/dev/zero", "of=test.log", "bs=1", "count=10"], [/* 23 vars */]) = 0 //调用execve,开始执行dd
brk(0) = 0xf34000 //通过brk来设置段大小,因为参数为0,所以用来取得当前结束地址。
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd90780f000//调用mmap,来映射内存,第一个参数为NULL,所以由内核来选择要映射的地址,长度为4096即4K一页,可读写,私有,匿名映射不涉及文件和偏移。
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)//访问/etc/ld.so.preload文件,是环境变量,没有找到
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3//打开/etc/ld.so.cache,包含了各种动态文件,返回句柄为3.
fstat(3, {st_mode=S_IFREG|0644, st_size=88433, ...}) = 0//查看该文件的状态
mmap(NULL, 88433, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fd9077f9000//通过mmap映射,应该用于读取
close(3) = 0//完事后,就关闭/etc/ld.so.cache文件。
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3//打开c库
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\34\2\0\0\0\0\0"..., 832) = 832//读取c库内容
fstat(3, {st_mode=S_IFREG|0755, st_size=2118128, ...}) = 0//查看该文件状态
mmap(NULL, 3932672, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fd907230000//映射该文件到内存,这个应该是代码段
mprotect(0x7fd9073e6000, 2097152, PROT_NONE) = 0//设置内存区域的保护,这个区域应该是映射的c 库文件中的一部分
mmap(0x7fd9075e6000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x7fd9075e6000//映射到内存,这个应该是数据或栈
mmap(0x7fd9075ec000, 16896, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fd9075ec000//又是一个匿名映射
close(3) = 0//映射完毕后,就可以关闭句柄了
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd9077f8000//又是一个匿名映射
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd9077f6000//又是一个匿名映射
arch_prctl(ARCH_SET_FS, 0x7fd9077f6740) = 0
mprotect(0x7fd9075e6000, 16384, PROT_READ) = 0//保护映射的c库代码段内存。
mprotect(0x610000, 4096, PROT_READ) = 0
mprotect(0x7fd907810000, 4096, PROT_READ) = 0
munmap(0x7fd9077f9000, 88433) = 0//取消/etc/ld.so.cache文件映射
rt_sigaction(SIGUSR1, NULL, {SIG_DFL, [], 0}, 8) = 0//设置进程的信号处理方式
rt_sigaction(SIGINT, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigaction(SIGUSR1, {0x403cd0, [INT USR1], SA_RESTORER, 0x7fd907265250}, NULL, 8) = 0
rt_sigaction(SIGINT, {0x403cc0, [INT USR1], SA_RESTORER|SA_NODEFER|SA_RESETHAND, 0x7fd907265250}, NULL, 8) = 0
brk(0) = 0xf34000//获取进程当前结束地址
brk(0xf55000) = 0xf55000//设置当前结束地址
brk(0) = 0xf55000//获取进程当前结束地址
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3//打开文件locale-archive,这个应该是系统安装的locals.
fstat(3, {st_mode=S_IFREG|0644, st_size=106070960, ...}) = 0//查看locale-archive状态
mmap(NULL, 106070960, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fd900d07000//映射locale-archive到内存
close(3) = 0//关闭locale-archive
open("/dev/zero", O_RDONLY) = 3//打开/dev/zero,这个才是我们去数据的源
dup2(3, 0) = 0//复制该文件句柄为0
close(3) = 0//关闭/dev/zero
lseek(0, 0, SEEK_CUR) = 0//寻找光标
open("test.log", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 3//打开test.log,我们的目的文件
dup2(3, 1) = 1//复制test.log的句柄为1
close(3) = 0//关闭test.log
read(0, "\0", 1) = 1//调用系统调用read,从/dev/zero中读一个字节的0
write(1, "\0", 1) = 1//调用系统调用write,写一个字节的0到test.log
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
read(0, "\0", 1) = 1
write(1, "\0", 1) = 1
close(0) = 0//关闭/dev/zero复制出来的句柄
close(1) = 0//关闭test.log复制出来的句柄,后面部分不详述了。
open("/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 0
fstat(0, {st_mode=S_IFREG|0644, st_size=2502, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd90780e000
read(0, "# Locale name alias data base.\n#"..., 4096) = 2502
read(0, "", 4096) = 0
close(0) = 0
munmap(0x7fd90780e000, 4096) = 0
open("/usr/share/locale/zh_CN.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/zh_CN.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/zh_CN/LC_MESSAGES/coreutils.mo", O_RDONLY) = 0
fstat(0, {st_mode=S_IFREG|0644, st_size=190751, ...}) = 0
mmap(NULL, 190751, PROT_READ, MAP_PRIVATE, 0, 0) = 0x7fd9077c7000
close(0) = 0
open("/usr/lib64/gconv/gconv-modules.cache", O_RDONLY) = 0
fstat(0, {st_mode=S_IFREG|0644, st_size=26254, ...}) = 0
mmap(NULL, 26254, PROT_READ, MAP_SHARED, 0, 0) = 0x7fd907808000
close(0) = 0
write(2, "\350\256\260\345\275\225\344\272\20610+0 \347\232\204\350\257\273\345\205\245\n\350\256\260\345\275\225\344\272"..., 48记录了10+0 的读入
记录了10+0 的写出
) = 48
write(2, "10\345\255\227\350\212\202(10 B)\345\267\262\345\244\215\345\210\266", 2310字节(10 B)已复制) = 23
write(2, "\357\274\2140.00188268 \347\247\222\357\274\2145.3 kB/\347\247\222\n", 31,0.00188268 秒,5.3 kB/秒
) = 31
close(2) = 0
exit_group(0) = ?
+++ exited with 0 +++
这里可以看到的是,dd命令会调用read和write系统调用.
另外在使用dd命令的过程中,进行监控发现,如果目的是那么该盘是时刻有io,如果目的为文件系统上的文件,那么io是间歇性才有的,也就是说有使用缓存,并没有绕开pagecache。从而说明,dd命令在过程的调用路径,是取决于其源和目的的。
往一个无文件系统的块文件dd时候,其块层的调用栈如下,发现都是kernel函数并无文件系统相关函数:
0xffffffff812ee1a0 : submit_bio+0x0/0x150 [kernel]
0xffffffff812327b3 : _submit_bh+0x143/0x210 [kernel]
0xffffffff81235392 : __block_write_full_page+0x162/0x370 [kernel]
0xffffffff81235757 : block_write_full_page+0xd7/0xf0 [kernel]
0xffffffff812395e8 : blkdev_writepage+0x18/0x20 [kernel]
0xffffffff8118af83 : __writepage+0x13/0x50 [kernel]
0xffffffff8118baa1 : write_cache_pages+0x251/0x4d0 [kernel]
0xffffffff8118bd6d : generic_writepages+0x4d/0x80 [kernel]
0xffffffff812395a5 : blkdev_writepages+0x35/0x40 [kernel]
0xffffffff8118ce1e : do_writepages+0x1e/0x40 [kernel]
0xffffffff81181a05 : __filemap_fdatawrite_range+0x65/0x80 [kernel]
0xffffffff81181a8d : filemap_write_and_wait+0x3d/0x80 [kernel]
0xffffffff8123a39f : __sync_blockdev+0x1f/0x40 [kernel]
0xffffffff8123a6ec : __blkdev_put+0x5c/0x1a0 [kernel]
0xffffffff8123b18e : blkdev_put+0x4e/0x140 [kernel]
0xffffffff8123b335 : blkdev_close+0x25/0x30 [kernel]
0xffffffff811ffad9 : __fput+0xe9/0x260 [kernel]
0xffffffff811ffd8e : ____fput+0xe/0x10 [kernel]
0xffffffff810accc7 : task_work_run+0xa7/0xe0 [kernel]
0xffffffff8102ab22 : do_notify_resume+0x92/0xb0 [kernel]
而往一个文件系统dd时候,其块层的调用栈如下,存在文件系统的相关函数,验证了我们的猜想:
0xffffffff812ee1a0 : submit_bio+0x0/0x150 [kernel]
0xffffffffa01e7d23 : xfs_submit_ioend_bio.isra.16+0x33/0x40 [xfs]
0xffffffffa01e7dfa : xfs_submit_ioend+0xca/0x130 [xfs]
0xffffffffa01e9012 : xfs_vm_writepage+0x2a2/0x5d0 [xfs]
0xffffffff8118af83 : __writepage+0x13/0x50 [kernel]
0xffffffff8118baa1 : write_cache_pages+0x251/0x4d0 [kernel]
0xffffffff8118bd6d : generic_writepages+0x4d/0x80 [kernel]
0xffffffffa01e8063 : xfs_vm_writepages+0x53/0x90 [xfs]
0xffffffff8118ce1e : do_writepages+0x1e/0x40 [kernel]
0xffffffff81181a05 : __filemap_fdatawrite_range+0x65/0x80 [kernel]
0xffffffff81181aec : filemap_flush+0x1c/0x20 [kernel]
0xffffffffa0203117 : xfs_release+0x137/0x170 [xfs]
0xffffffffa01f3275 : xfs_file_release+0x15/0x20 [xfs]
0xffffffff811ffad9 : __fput+0xe9/0x260 [kernel]
0xffffffff811ffd8e : ____fput+0xe/0x10 [kernel]
0xffffffff810accc7 : task_work_run+0xa7/0xe0 [kernel]
0xffffffff8102ab22 : do_notify_resume+0x92/0xb0 [kernel]
0xffffffff816962bd : int_signal+0x12/0x17 [kernel]