Linux C语言等待子进程的批量操作

Linux C 语言等待子进程的批量操作

进程相关基础概念回顾

在深入探讨等待子进程的批量操作之前，我们先来回顾一下 Linux 进程相关的一些基础概念。

进程的创建

在 Linux 系统中，使用 fork() 函数来创建新的进程。fork() 函数会创建一个与调用进程几乎完全相同的新进程，这个新进程称为子进程，而调用 fork() 的进程称为父进程。fork() 函数的返回值比较特殊，在父进程中返回子进程的进程 ID（PID），在子进程中返回 0。如果 fork() 失败，会返回 -1 并设置相应的错误号。下面是一个简单的 fork() 示例代码：

#include <stdio.h>
#include <unistd.h>

int main() {
    pid_t pid;
    pid = fork();
    if (pid == -1) {
        perror("fork error");
        return 1;
    } else if (pid == 0) {
        printf("I am child process. My PID is %d\n", getpid());
    } else {
        printf("I am parent process. My PID is %d, child's PID is %d\n", getpid(), pid);
    }
    return 0;
}

在这个例子中，父进程创建了子进程后，父进程和子进程会分别执行不同分支的代码，从而展现出不同的输出。

进程的终止

进程可以通过多种方式终止，常见的有正常终止和异常终止。正常终止包括调用 exit() 函数，_exit() 函数或者从 main() 函数返回。exit() 函数会执行一些清理工作，如刷新标准 I/O 缓冲区等，然后调用 _exit() 函数。_exit() 函数则直接终止进程，不进行额外的清理。异常终止则是由于进程收到某些信号导致，比如 SIGSEGV（段错误）、SIGABRT（程序异常终止信号）等。

等待子进程的基本操作

在 Linux C 语言编程中，父进程通常需要等待子进程完成其任务后再继续执行，这就用到了等待子进程的操作。

wait() 函数

wait() 函数用于使父进程暂停执行，直到它的一个子进程终止。wait() 函数的原型如下：

#include <sys/types.h>
#include <sys/wait.h>

pid_t wait(int *status);

wait() 函数的返回值是终止的子进程的 PID。如果 wait() 调用成功，status 参数将包含子进程的退出状态信息。我们可以通过一些宏来解析 status，比如 WIFEXITED(status) 用于判断子进程是否正常终止，如果是则返回非零值，此时可以通过 WEXITSTATUS(status) 获取子进程的退出状态（通过 exit() 函数传递的参数）。下面是一个使用 wait() 函数的示例：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

int main() {
    pid_t pid;
    int status;
    pid = fork();
    if (pid == -1) {
        perror("fork error");
        return 1;
    } else if (pid == 0) {
        printf("Child process is running. PID: %d\n", getpid());
        sleep(2);
        exit(10);
    } else {
        printf("Parent process is waiting for child. PID: %d\n", getpid());
        pid_t child_pid = wait(&status);
        if (WIFEXITED(status)) {
            printf("Child process %d exited with status %d\n", child_pid, WEXITSTATUS(status));
        }
    }
    return 0;
}

在这个例子中，子进程睡眠 2 秒后以状态值 10 退出。父进程通过 wait() 函数等待子进程结束，并获取其退出状态进行打印。

waitpid() 函数

waitpid() 函数提供了比 wait() 更灵活的等待子进程的方式。其原型如下：

#include <sys/types.h>
#include <sys/wait.h>

pid_t waitpid(pid_t pid, int *status, int options);

pid 参数指定要等待的子进程的 PID。如果 pid 为 -1，则等待任何一个子进程，这与 wait() 函数类似。如果 pid 大于 0，则等待指定 PID 的子进程。options 参数可以设置一些选项，比如 WNOHANG 表示如果没有子进程退出则立即返回，而不是阻塞等待。下面是一个使用 waitpid() 函数并设置 WNOHANG 选项的示例：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

int main() {
    pid_t pid;
    int status;
    pid = fork();
    if (pid == -1) {
        perror("fork error");
        return 1;
    } else if (pid == 0) {
        printf("Child process is running. PID: %d\n", getpid());
        sleep(2);
        exit(10);
    } else {
        printf("Parent process is trying to wait child. PID: %d\n", getpid());
        while (1) {
            pid_t child_pid = waitpid(pid, &status, WNOHANG);
            if (child_pid == -1) {
                perror("waitpid error");
                break;
            } else if (child_pid == 0) {
                printf("Child is still running. Parent will check again later.\n");
                sleep(1);
            } else {
                if (WIFEXITED(status)) {
                    printf("Child process %d exited with status %d\n", child_pid, WEXITSTATUS(status));
                }
                break;
            }
        }
    }
    return 0;
}

在这个示例中，父进程通过 waitpid() 函数设置 WNOHANG 选项，不会一直阻塞等待子进程结束，而是每隔 1 秒检查一次子进程是否结束。

批量操作子进程及等待

在实际应用中，我们常常需要创建多个子进程并等待它们全部完成。

创建多个子进程

要创建多个子进程，我们可以在一个循环中调用 fork() 函数。例如，下面的代码创建 5 个子进程：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>

int main() {
    int i;
    for (i = 0; i < 5; i++) {
        pid_t pid = fork();
        if (pid == -1) {
            perror("fork error");
            return 1;
        } else if (pid == 0) {
            printf("I am child %d. My PID is %d\n", i, getpid());
            // 子进程执行自己的任务，这里简单模拟为睡眠
            sleep(i + 1);
            exit(i);
        }
    }
    // 父进程继续执行其他代码
    return 0;
}

在这个例子中，每个子进程睡眠不同的时间后以不同的状态退出。

等待多个子进程

要等待这多个子进程全部完成，我们可以在父进程中使用 wait() 或 waitpid() 函数多次调用。对于 wait() 函数，由于它会等待任意一个子进程结束，所以我们可以通过一个循环来等待所有子进程。示例代码如下：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

int main() {
    int i;
    for (i = 0; i < 5; i++) {
        pid_t pid = fork();
        if (pid == -1) {
            perror("fork error");
            return 1;
        } else if (pid == 0) {
            printf("I am child %d. My PID is %d\n", i, getpid());
            sleep(i + 1);
            exit(i);
        }
    }
    int status;
    pid_t child_pid;
    while ((child_pid = wait(&status)) != -1) {
        if (WIFEXITED(status)) {
            printf("Child process %d exited with status %d\n", child_pid, WEXITSTATUS(status));
        }
    }
    return 0;
}

在这个代码中，父进程通过一个 while 循环不断调用 wait() 函数，直到所有子进程都结束。每次 wait() 函数返回一个已结束子进程的 PID，父进程获取其退出状态并打印。

如果使用 waitpid() 函数等待多个子进程，可以通过记录每个子进程的 PID，然后在循环中使用 waitpid() 针对每个 PID 进行等待。示例如下：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

int main() {
    int i;
    pid_t pids[5];
    for (i = 0; i < 5; i++) {
        pids[i] = fork();
        if (pids[i] == -1) {
            perror("fork error");
            return 1;
        } else if (pids[i] == 0) {
            printf("I am child %d. My PID is %d\n", i, getpid());
            sleep(i + 1);
            exit(i);
        }
    }
    int status;
    for (i = 0; i < 5; i++) {
        pid_t child_pid = waitpid(pids[i], &status, 0);
        if (WIFEXITED(status)) {
            printf("Child process %d exited with status %d\n", child_pid, WEXITSTATUS(status));
        }
    }
    return 0;
}

在这个代码中，父进程先记录每个子进程的 PID 到数组 pids 中，然后通过 waitpid() 针对每个 PID 等待子进程结束并获取其退出状态。

处理子进程的异常情况

在等待子进程批量操作的过程中，可能会遇到一些异常情况，需要我们妥善处理。

子进程异常终止

如果子进程因为收到信号而异常终止，wait() 或 waitpid() 函数返回的 status 也会反映这种情况。我们可以使用 WIFSIGNALED(status) 宏来判断子进程是否是因为收到信号而终止，如果是，则可以通过 WTERMSIG(status) 获取导致子进程终止的信号编号。下面是一个示例，模拟子进程因段错误（SIGSEGV）异常终止：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

int main() {
    pid_t pid = fork();
    if (pid == -1) {
        perror("fork error");
        return 1;
    } else if (pid == 0) {
        // 模拟段错误
        int *ptr = NULL;
        *ptr = 10;
        exit(0);
    } else {
        int status;
        pid_t child_pid = wait(&status);
        if (WIFSIGNALED(status)) {
            printf("Child process %d terminated by signal %d\n", child_pid, WTERMSIG(status));
        }
    }
    return 0;
}

在这个示例中，子进程尝试访问空指针，导致段错误。父进程通过 wait() 函数获取子进程的状态，并判断出子进程是因信号终止，进而打印出导致终止的信号编号。

处理僵尸进程

当子进程终止而父进程没有调用 wait() 或 waitpid() 来获取其状态时，子进程会变成僵尸进程。僵尸进程会占用系统资源，如果大量产生可能会导致系统资源耗尽。为了避免僵尸进程的产生，父进程应该及时等待子进程结束。另外，也可以通过设置信号处理函数来处理 SIGCHLD 信号，在信号处理函数中调用 wait() 或 waitpid() 来清理僵尸进程。下面是一个示例：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <signal.h>

void sigchld_handler(int signum) {
    pid_t pid;
    int status;
    while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
        if (WIFEXITED(status)) {
            printf("Child process %d exited with status %d\n", pid, WEXITSTATUS(status));
        }
    }
}

int main() {
    struct sigaction sa;
    sa.sa_handler = sigchld_handler;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_RESTART;
    if (sigaction(SIGCHLD, &sa, NULL) == -1) {
        perror("sigaction error");
        return 1;
    }
    int i;
    for (i = 0; i < 5; i++) {
        pid_t pid = fork();
        if (pid == -1) {
            perror("fork error");
            return 1;
        } else if (pid == 0) {
            printf("I am child %d. My PID is %d\n", i, getpid());
            sleep(i + 1);
            exit(i);
        }
    }
    // 父进程继续执行其他任务，这里简单模拟为睡眠
    sleep(10);
    return 0;
}

在这个示例中，我们定义了一个 SIGCHLD 信号的处理函数 sigchld_handler。在处理函数中，通过 waitpid() 函数以 WNOHANG 选项不断检查是否有子进程结束，并处理它们的退出状态。父进程在创建多个子进程后，通过睡眠模拟执行其他任务，在此期间子进程结束时会触发 SIGCHLD 信号，从而避免了僵尸进程的产生。

实际应用场景

在很多实际应用中，批量创建子进程并等待它们完成是常见的需求。

并行任务处理

例如，在一个文件处理程序中，需要对多个文件进行并行处理。我们可以为每个文件创建一个子进程，让子进程分别处理文件，父进程等待所有子进程完成后再进行汇总等后续操作。以下是一个简单的模拟示例，假设我们要对 3 个文件进行简单的内容读取和行数统计：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>

#define FILE_COUNT 3

void process_file(const char *filename) {
    int fd = open(filename, O_RDONLY);
    if (fd == -1) {
        perror("open file error");
        exit(1);
    }
    char buffer[1024];
    int line_count = 0;
    while (read(fd, buffer, sizeof(buffer)) > 0) {
        for (int i = 0; i < strlen(buffer); i++) {
            if (buffer[i] == '\n') {
                line_count++;
            }
        }
    }
    printf("File %s has %d lines.\n", filename, line_count);
    close(fd);
    exit(0);
}

int main() {
    const char *filenames[FILE_COUNT] = {"file1.txt", "file2.txt", "file3.txt"};
    pid_t pids[FILE_COUNT];
    for (int i = 0; i < FILE_COUNT; i++) {
        pids[i] = fork();
        if (pids[i] == -1) {
            perror("fork error");
            return 1;
        } else if (pids[i] == 0) {
            process_file(filenames[i]);
        }
    }
    int status;
    for (int i = 0; i < FILE_COUNT; i++) {
        waitpid(pids[i], &status, 0);
    }
    printf("All files processed.\n");
    return 0;
}

在这个示例中，每个子进程负责处理一个文件，统计文件的行数并打印。父进程等待所有子进程完成后输出所有文件已处理完成的信息。

分布式计算

在分布式计算场景中，主节点可以创建多个子进程来模拟不同的计算节点，将计算任务分发给这些子进程，然后等待子进程返回计算结果进行汇总。例如，计算 1 到 100 的整数之和，可以将任务划分为多个子任务，让不同子进程分别计算部分区间的和，最后汇总结果。以下是一个简单示例：

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#define CHILD_COUNT 5

int main() {
    pid_t pids[CHILD_COUNT];
    int sums[CHILD_COUNT];
    int start = 1;
    int step = 100 / CHILD_COUNT;
    for (int i = 0; i < CHILD_COUNT; i++) {
        pids[i] = fork();
        if (pids[i] == -1) {
            perror("fork error");
            return 1;
        } else if (pids[i] == 0) {
            int end = start + step - 1;
            if (i == CHILD_COUNT - 1) {
                end = 100;
            }
            int sum = 0;
            for (int j = start; j <= end; j++) {
                sum += j;
            }
            printf("Child %d calculated sum from %d to %d: %d\n", i, start, end, sum);
            exit(sum);
        }
        start += step;
    }
    int status;
    int total_sum = 0;
    for (int i = 0; i < CHILD_COUNT; i++) {
        waitpid(pids[i], &status, 0);
        if (WIFEXITED(status)) {
            total_sum += WEXITSTATUS(status);
        }
    }
    printf("Total sum from 1 to 100: %d\n", total_sum);
    return 0;
}

在这个示例中，父进程创建 5 个子进程，每个子进程计算一段区间内整数的和，然后父进程等待所有子进程结束并汇总它们的计算结果，得到 1 到 100 的整数之和。

通过以上对 Linux C 语言中等待子进程批量操作的详细介绍，包括基础概念、基本操作、异常处理以及实际应用场景等方面，希望能帮助读者深入理解并熟练运用这一重要的编程技巧，在 Linux 环境下开发出高效稳定的应用程序。