【Linux】文件操作、文件描述符和重定向（下）-阿里云开发者社区

read

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <assert.h>
#include <unistd.h>
#define FILE_NAME "log.txt"
int main()
{
    umask(0);   // 将权限掩码设置为0,文件的最终权限等于起始权限&(~umask)
    //int fd = open(FILE_NAME, O_WRONLY | O_CREAT | O_APPEND, 0666);
    int fd = open(FILE_NAME, O_RDONLY);
    if(fd < 0)
    {
        perror("open");
        return 1;
    }
    char buffer[1024];
    ssize_t num = read(fd, buffer, sizeof(buffer) - 1); // 减1的原因是给\0留一个位置
    if(num > 0) buffer[num] = '\0';
    printf("%s", buffer);
  return 0;
}

库函数与系统调用的关系

👉文件的深入理解👈

文件描述符

在前面已经提到过：文件操作的本质就是进程和被打开文件的关系。进程是可以代开多个文件的，那么系统中一定会存在大量的被打开的文件的。这些被打开的文件，就要被操作系统管理起来。管理的本质是先描述再组织。操作系统为了管理对应的打开文件，必定要为文件创建对应的内核数据结构来表示文件，而这个内核数据结构就是struct file，其包含了文件的大部分属性。注：struct file和 C 语言的FILE不是一样的东西。

那接下来，我们就来学习进程是如何和被打开文件关联起来的！

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <assert.h>
#include <unistd.h>
// #的作用是将宏参数转化成字符串并与其他字符串连接起来
#define FILE_NAME(number) "log.txt"#number
int main()
{
    umask(0);   // 将权限掩码设置为0,文件的最终权限等于起始权限&(~umask)
    int fd0 = open(FILE_NAME(0), O_WRONLY | O_CREAT | O_APPEND, 0666);
    int fd1 = open(FILE_NAME(1), O_WRONLY | O_CREAT | O_APPEND, 0666);
    int fd2 = open(FILE_NAME(2), O_WRONLY | O_CREAT | O_APPEND, 0666);
    int fd3 = open(FILE_NAME(3), O_WRONLY | O_CREAT | O_APPEND, 0666);
    int fd4 = open(FILE_NAME(4), O_WRONLY | O_CREAT | O_APPEND, 0666);
    printf("fd0:%d\n", fd0);
    printf("fd1:%d\n", fd1);
    printf("fd2:%d\n", fd2);
    printf("fd3:%d\n", fd3);
    printf("fd4:%d\n", fd4);
    close(fd0);
    close(fd1);
    close(fd2);
    close(fd3);
    close(fd4);
    return 0;
}

看到上面连续的小整数，我想大家肯定能够想到数组的下标，那么我们可以猜测文件描述符可能与数组有关。那为什么是从 3 开始的呢？0、1、2 那哪去了？在学习 C 语言的时候，我们学到过 C 语言程序会默认打开三个流：stdin（标准输入流：键盘）、stdout（标准输出流：显示器）和 stderr（标准错误流：显示器）。这三个流的类型都是FILE*，而FILE是结构体。C 语言进行文件操作是使用的是FILE*，而操作系统使用的是文件描述符fd，那么结构体FILE中肯定包含文件描述符fd。所以 0、1、2 就被这三个流使用了。

写个程序来验证上面的说法

理解文件描述符的本质

文件描述符的本质是进程的文件描述符表的下表，也就是数组下标！！！进程与被打开文件的关系：进程通过文件描述符表指向对应的被打开的文件。

文件描述符的分配规则

按顺序从小到大查找文件描述符表，最小的且没有被占用的 fd 就会分配给被打开的文件。

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
int main()
{
    umask(0);
    int fd = open("log.txt", O_WRONLY | O_CREAT | O_TRUNC, 0666);
    assert(fd != -1);
    printf("fd:%d\n", fd);
    close(fd);
    return 0;
}

关闭 0

关闭 2

关闭 0 和 2

重定向

那如果我们只把 1 关掉会怎么样呢？

将程序运行起来，我们可以发现并没有向显示器上打印信息。原因也非常的简单，因为我们把标准输出（显示器）给关掉了。又因为 printf 函数是向 stdout 上打印的，stdout 的文件描述符为 1，而当前 1 号文件描述符执行的是我们自己创建的文件，所以数据就被打印到了该文件中了。注：需要刷新 stdout 才能看到信息。

如果我们没有关掉 1，数据就会被打印到显示器上；而如果我们关掉了 1，数据就被打印到了文件里。那么这种现象就叫做重定向。常见的重定向：输出重定向>、追加重定向>>和输入重定向<。重定向的本质是：上层使用的 fd 不变，在内核中更改 fd 对于的struct file*的地址。

如果重定向先要关闭 1，才能进行重定向的话，这就有点挫了。系统为了支持我们更好地进行重定向，给我们提供了一个系统调用dup2。

请简述重定向的实现原理：

每个文件描述符都是一个内核中文件描述信息数组的下标，对应有一个文件的描述信息用于操作文件，而重定向就是在不改变所操作的文件描述符的情况下，通过改变描述符对应的文件描述信息进而实现改变所操作的文件。

1. 输出重定向

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
int main()
{
    umask(0);
    int fd = open("log.txt", O_WRONLY | O_CREAT | O_TRUNC, 0666);
    assert(fd != -1);
    dup2(fd, 1);    // 将fd的内容拷贝到1中
    printf("open fd:%d\n", fd); // printf -> stdout
    fprintf(stdout, "open fd:%d\n", fd);   // fprintf -> stdout 
    fflush(stdout); // 刷新缓冲区
    close(fd);
    return 0;
}

2. 追加重定向

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
int main()
{
    umask(0);
    int fd = open("log.txt", O_WRONLY | O_CREAT | O_APPEND, 0666);
    assert(fd != -1);
    dup2(fd, 1);    // 将fd的内容拷贝到1中
    printf("open fd:%d\n", fd); // printf -> stdout
    fprintf(stdout, "open fd:%d\n", fd);   // fprintf -> stdout 
    const char* msg = "It's Crazy Thursday. Give me 50 yuan\n";
    write(1, msg, strlen(msg));
    fflush(stdout); // 刷新缓冲区
    close(fd);
    return 0;
}

3. 输入重定向

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
int main()
{
    int fd = open("log.txt", O_RDONLY);
    assert(fd != -1);
    dup2(fd, 0);    // 将fd的内容拷贝到0中
    char line[64];
    while(1)
    {
        printf("< ");
        // 读取结束退出while循环
        if(fgets(line, sizeof(line) - 1, stdin) == NULL)    
            break;
        printf("%s", line);
    }
    close(fd);
    return 0;
}

myshell 实现重定向

因为命令是子进程执行的真正重定向的工作一定是子进程执行的

如何重定向，是父进程要给子进程提供信息

重定向不会影响父进程,因为进程具有独立性

进行重定向时，子进程会发生写实拷贝，拷贝父进程的 PCB 和文件描述符表，再来修改自己的文件描述符表进行重定向

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <assert.h>
#include <string.h>
#include <ctype.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#define NUM 1024
#define OPT_NUM 64  // 命令行参数的最多个数
#define NONE_REDIR 0    // 无重定向
#define INPUT_REDIR 1   // 输入重定向
#define OUTPUT_REDIR 2  // 输出重定向
#define APPEDN_REDIR 3  // 追加重定向
// 过滤空格
#define trimSpace(start) do{ while(isspace(*start))  ++start; }while(0)
char lineCommand[NUM];
char* myargv[OPT_NUM];
// 上一个进程的退出信息
int lastCode = 0;
int lastSignal = 0;
int redirType = NONE_REDIR; // 重定向类型默认为无重定向
char* redirFile = NULL;     // 重定向的文件名
// "ls -a -l > myfile.txt" -> "ls -a -l" "myfile.txt"
void commandCheck(char* commands)
{
    // 重置重定向信息
    redirType = NONE_REDIR;
    redirFile = NULL;
    // 重置错误码
    errno = 0;
    assert(commands);
    char* start = commands;
    char* end = commands + strlen(commands);
    while(start < end)
    {
        if(*start == '>')
        {
            *start = '\0';
            ++start;
            if(*start == '>')
            {
                // "ls -a >> myfile.txt"
                redirType = APPEDN_REDIR;   // 追加重定向
                ++start;
            }
            else
            {
                // "ls -a > myfile.txt"
                redirType = OUTPUT_REDIR;   // 输出重定向
            }
            trimSpace(start);   // 过滤空格
            redirFile = start;
            break;
        }
        else if(*start == '<')
        {
            // "cat <     myfile.txt"
            *start = '\0';  // 将字符串分割成两部分
            ++start;
            trimSpace(start);   // 过滤空格
            // 填写重定向信息
            redirType = INPUT_REDIR;    // 输入重定向
            redirFile = start;
            break;
        }
        else
        {
            ++start;
        }
    }
}
int main()
{
    while(1)
    {
        char* user = getenv("USER");
        // 根据用户输出对应的提示信息, get_current_dir_name函数可以获得当前的工作路径
        if(strcmp(user, "root") == 0)
        {
            printf("[%s@%s %s]# ", user, getenv("HOSTNAME"), get_current_dir_name());
        }
        else
        {
            printf("[%s@%s %s]$ ", user, getenv("HOSTNAME"), get_current_dir_name());
        }
        fflush(stdout); // 刷新缓冲区
        // 获取用户输入
        char* s = fgets(lineCommand, sizeof(lineCommand) - 1, stdin);
        assert(s != NULL);
        // 清除最后一个\n, abcd\n
        lineCommand[strlen(lineCommand) - 1] = 0;
        // 字符串切割:"ls -a -l" -> "ls" "-a" "-l"
        // "ls -a -l > myfile.txt" -> "ls -a -l" "myfile.txt"
        // "cat < myfile.txt" -> "cat" "myfile.txt"
        commandCheck(lineCommand);  // 如果有重定向,则将字符串拆成两部分
        myargv[0] = strtok(lineCommand, " ");
        int i = 1;
        // 因为无法执行"ll"指令, 所以这里做一下处理
        if(myargv[0] != NULL && strcmp(myargv[0], "ll") == 0)
        {
            myargv[0] = "ls";
            myargv[i++] = "-l";
        }
        if(myargv[0] != NULL && strcmp(myargv[0], "ls") == 0)
        {
            myargv[i++] = "--color=auto";
        }
        // 如果切割完毕, strtok返回NULL, myargv[end] = NULL
        while(myargv[i++] = strtok(NULL, " "));
        // 如果是cd命令, 不需要创建子进程来执行, 让当前进程的父进程shell执行对应的命令, 本质就是调用系统接口
        // 像这种不需要创建子进程来执行, 而是让shell自己执行的命令, 称为内建命令或者内置命令
        // echo和cd就是一个内建命令
        if(myargv[0] != NULL && strcmp(myargv[0], "cd") == 0)
        {
            // 如果cd命令没有第二个参数, 则切换到家目录
            if(myargv[1] == NULL)
            {
                chdir(getenv("HOME"));  // 更改到家目录
            }
            else
            {
                if(strcmp(myargv[1], "-") == 0) // 该功能还有BUG, 因为环境变量的问题
                {
                    chdir(getenv("OLDPWD"));    // 回到上一次所处的路径
                }
                else if(strcmp(myargv[1], "~") == 0)
                {
                    chdir(getenv("HOME"));  // 去到家目录
                }
                else
                {
                    chdir(myargv[1]);   // 更改到指定目录
                }
            }
            continue;   // 不创建子进程, continue回到while循环处
        }
        // 实现echo命令, 当前的echo命令功能也不是很全
        if(myargv[0] != NULL && myargv[1] != NULL && strcmp(myargv[0], "echo") == 0)
        {
            if(strcmp(myargv[1], "$?") == 0)
            {
                printf("%d, %d\n", lastSignal, lastCode);
            }
            else
            {
                printf("%s\n", myargv[1]);
            }
            continue;
        }
        // 创建子进程来执行命令
        pid_t id = fork();
        assert(id != -1);
        // child process
        if(id == 0)
        {
            // 因为命令是子进程执行的,真正重定向的工作一定是子进程执行的
            // 如何重定向,是父进程要个子进程提供信息
            // 这里的重定向不会影响父进程,因为进程具有独立性
            switch(redirType)
            {
                case NONE_REDIR:
                    // 什么都不做
                    break;
                case INPUT_REDIR:
                    {
                        ssize_t fd = open(redirFile, O_RDONLY);
                        if(fd < 0)
                        {
                            perror("open");
                            exit(errno);
                        }
                        // 重定向的文件已经成功打开了
                        dup2(fd, 0);
                    }
                    break;
                case OUTPUT_REDIR:
                case APPEDN_REDIR:
                    {
                        int flags = O_WRONLY | O_CREAT;
                        if(redirType == APPEDN_REDIR)   flags |= O_APPEND;
                        else flags |= O_TRUNC;
                        ssize_t fd = open(redirFile, flags, 0666);
                        if(fd < 0)
                        {
                            perror("open");
                            exit(errno);
                        }
                        dup2(fd, 1);    // ls等指令执行结果是打印在显示器上的
                    }
                    break;
                default:
                    printf("error\n");
                    break;
            }
            execvp(myargv[0], myargv);  // 执行程序替换的时候,不会影响曾经进程打开的重定向的文件,因为程序替换只是替换代码和数据
            exit(errno);    // 进程替换失败
        }
        int status = 0;
        pid_t ret = waitpid(id, &status, 0);   // 阻塞等待
        assert(ret > 0);
        lastCode = ((status >> 8) & 0xFF);
        lastSignal = (status & 0x7F);
    }
    return 0;
}