Linux C语言非阻塞I/O的错误处理

非阻塞I/O概述

在Linux环境下，I/O操作通常分为阻塞和非阻塞两种模式。阻塞I/O是指当执行I/O操作时，程序会一直等待，直到操作完成。例如，当调用read函数从文件描述符读取数据时，如果没有数据可读，read函数会阻塞当前进程，直到有数据可读或者发生错误。

而非阻塞I/O则不同，当执行非阻塞I/O操作时，如果操作无法立即完成，函数会立即返回，返回值通常表示操作的状态。以read函数为例，如果没有数据可读，非阻塞模式下的read函数会返回 -1，并设置errno为EAGAIN或EWOULDBLOCK，表示操作暂时无法完成，但稍后可以重试。

非阻塞I/O在很多场景下非常有用，比如在网络编程中，我们可能需要同时处理多个连接，而不想因为等待某个连接的数据而阻塞整个程序。通过将文件描述符设置为非阻塞模式，我们可以在没有数据可读时立即返回，去处理其他连接或执行其他任务。

设置文件描述符为非阻塞模式

在Linux中，可以通过fcntl函数来设置文件描述符为非阻塞模式。fcntl函数的原型如下：

#include <fcntl.h>
int fcntl(int fd, int cmd, ... /* arg */ );

通常使用F_GETFL和F_SETFL命令来获取和设置文件描述符的标志。要将文件描述符设置为非阻塞模式，可以按如下方式操作：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>

int main() {
    int fd = open("test.txt", O_RDONLY);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    // 获取当前文件描述符标志
    int flags = fcntl(fd, F_GETFL, 0);
    if (flags == -1) {
        perror("fcntl F_GETFL");
        close(fd);
        exit(EXIT_FAILURE);
    }

    // 设置非阻塞标志
    flags |= O_NONBLOCK;
    if (fcntl(fd, F_SETFL, flags) == -1) {
        perror("fcntl F_SETFL");
        close(fd);
        exit(EXIT_FAILURE);
    }

    // 进行非阻塞I/O操作
    char buffer[1024];
    ssize_t bytes_read = read(fd, buffer, sizeof(buffer));
    if (bytes_read == -1) {
        if (errno == EAGAIN || errno == EWOULDBLOCK) {
            printf("No data available yet, can retry later.\n");
        } else {
            perror("read");
        }
    } else if (bytes_read > 0) {
        buffer[bytes_read] = '\0';
        printf("Read data: %s\n", buffer);
    }

    close(fd);
    return 0;
}

在上述代码中，首先打开一个文件test.txt，然后通过fcntl函数获取文件描述符的当前标志，添加O_NONBLOCK标志后再设置回去，从而将文件描述符设置为非阻塞模式。之后进行read操作，根据返回值和errno来处理不同情况。

非阻塞I/O错误处理的重要性

在非阻塞I/O中，错误处理尤为重要。由于非阻塞I/O操作可能会立即返回，并且在操作无法立即完成时返回错误码，正确处理这些错误码是保证程序正常运行的关键。如果错误处理不当，可能会导致程序无法正确处理数据，或者在错误情况下不断重试不必要的操作，浪费系统资源。

常见的非阻塞I/O错误及处理

EAGAIN/EWOULDBLOCK错误

正如前面提到的，当非阻塞I/O操作无法立即完成时，函数通常会返回 -1，并设置errno为EAGAIN或EWOULDBLOCK。这两个错误码本质上表示相同的意思，即操作暂时无法完成，但稍后可以重试。

在网络编程中，例如在从套接字读取数据时，如果出现EAGAIN或EWOULDBLOCK错误，说明当前没有数据可读，但并不意味着连接出现了问题。可以通过轮询或者使用多路复用技术（如select、poll、epoll）来等待数据到达，然后再重试I/O操作。

以下是一个使用轮询处理EAGAIN错误的示例代码：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>

#define BUFFER_SIZE 1024

int main() {
    int fd = open("test.txt", O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    char buffer[BUFFER_SIZE];
    ssize_t bytes_read;
    while (1) {
        bytes_read = read(fd, buffer, sizeof(buffer));
        if (bytes_read == -1) {
            if (errno == EAGAIN || errno == EWOULDBLOCK) {
                printf("No data available yet, retrying...\n");
                // 可以适当睡眠一段时间避免过度轮询
                sleep(1);
            } else {
                perror("read");
                close(fd);
                exit(EXIT_FAILURE);
            }
        } else if (bytes_read > 0) {
            buffer[bytes_read] = '\0';
            printf("Read data: %s\n", buffer);
            break;
        }
    }

    close(fd);
    return 0;
}

在上述代码中，当read函数返回 -1 且errno为EAGAIN或EWOULDBLOCK时，程序会打印提示信息并睡眠1秒，然后再次尝试读取数据。

EBADF错误

EBADF错误表示文件描述符无效。这可能是因为文件描述符已经关闭，或者从未打开过。当在非阻塞I/O操作中遇到EBADF错误时，需要检查文件描述符的状态，确保其有效性。

以下是一个可能触发EBADF错误的示例代码及处理：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>

int main() {
    int fd = open("test.txt", O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    close(fd);

    // 尝试在关闭后使用文件描述符
    ssize_t bytes_read = read(fd, NULL, 0);
    if (bytes_read == -1) {
        if (errno == EBADF) {
            printf("File descriptor is invalid, it might be closed.\n");
        } else {
            perror("read");
        }
    }

    return 0;
}

在上述代码中，先打开文件并获取文件描述符，然后关闭文件描述符，接着尝试使用已关闭的文件描述符进行read操作，此时会返回 -1 且errno为EBADF，程序会打印相应的错误提示。

EINTR错误

EINTR错误表示系统调用被信号中断。在Linux中，当一个进程接收到信号时，正在执行的系统调用可能会被中断，并返回 -1，同时设置errno为EINTR。对于非阻塞I/O操作，遇到EINTR错误时，通常可以选择重试该操作。

以下是一个处理EINTR错误的示例代码：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <signal.h>

void signal_handler(int signum) {
    // 信号处理函数
    printf("Received signal %d\n", signum);
}

int main() {
    signal(SIGINT, signal_handler);

    int fd = open("test.txt", O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    char buffer[1024];
    ssize_t bytes_read;
    do {
        bytes_read = read(fd, buffer, sizeof(buffer));
        if (bytes_read == -1) {
            if (errno == EINTR) {
                printf("System call interrupted by signal, retrying...\n");
            } else if (errno == EAGAIN || errno == EWOULDBLOCK) {
                printf("No data available yet, can retry later.\n");
            } else {
                perror("read");
                close(fd);
                exit(EXIT_FAILURE);
            }
        }
    } while (bytes_read == -1 && errno == EINTR);

    if (bytes_read > 0) {
        buffer[bytes_read] = '\0';
        printf("Read data: %s\n", buffer);
    }

    close(fd);
    return 0;
}

在上述代码中，注册了一个信号处理函数signal_handler来处理SIGINT信号。在read操作中，如果返回 -1 且errno为EINTR，程序会打印提示信息并重新尝试read操作。

EIO错误

EIO错误表示发生了I/O错误。这可能是由于硬件故障、文件系统损坏等原因导致的。当在非阻塞I/O操作中遇到EIO错误时，通常需要检查相关设备或文件系统的状态，并且可能需要停止当前的I/O操作。

以下是一个模拟EIO错误的示例及处理：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>

// 模拟I/O错误的函数
ssize_t mock_read(int fd, void *buf, size_t count) {
    // 这里简单模拟返回错误
    errno = EIO;
    return -1;
}

int main() {
    int fd = open("test.txt", O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    char buffer[1024];
    ssize_t bytes_read = mock_read(fd, buffer, sizeof(buffer));
    if (bytes_read == -1) {
        if (errno == EIO) {
            printf("An I/O error occurred. Check device or file system.\n");
        } else {
            perror("read");
        }
    }

    close(fd);
    return 0;
}

在上述代码中，通过自定义的mock_read函数模拟返回EIO错误，程序检测到EIO错误时会打印相应的提示信息。

基于多路复用的非阻塞I/O错误处理

多路复用技术（如select、poll、epoll）可以有效地管理多个文件描述符，同时处理多个I/O操作。在使用多路复用技术时，错误处理也需要特别注意。

select

select函数用于等待文件描述符集合中的一个或多个描述符变为就绪状态。其原型如下：

#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>

int select(int nfds, fd_set *readfds, fd_set *writefds,
           fd_set *exceptfds, struct timeval *timeout);

在使用select进行非阻塞I/O时，如果select返回 -1，需要检查errno来判断错误原因。常见的错误有EBADF（无效的文件描述符）、EINTR（被信号中断）等。

以下是一个使用select进行非阻塞I/O并处理错误的示例代码：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/select.h>
#include <sys/time.h>

#define BUFFER_SIZE 1024

int main() {
    int fd = open("test.txt", O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    fd_set read_fds;
    FD_ZERO(&read_fds);
    FD_SET(fd, &read_fds);

    struct timeval timeout;
    timeout.tv_sec = 5;
    timeout.tv_usec = 0;

    int result = select(fd + 1, &read_fds, NULL, NULL, &timeout);
    if (result == -1) {
        if (errno == EINTR) {
            printf("select interrupted by signal, retrying...\n");
        } else if (errno == EBADF) {
            printf("Invalid file descriptor in select.\n");
        } else {
            perror("select");
        }
    } else if (result == 0) {
        printf("select timeout.\n");
    } else {
        if (FD_ISSET(fd, &read_fds)) {
            char buffer[BUFFER_SIZE];
            ssize_t bytes_read = read(fd, buffer, sizeof(buffer));
            if (bytes_read == -1) {
                if (errno == EAGAIN || errno == EWOULDBLOCK) {
                    printf("No data available yet, can retry later.\n");
                } else {
                    perror("read");
                }
            } else if (bytes_read > 0) {
                buffer[bytes_read] = '\0';
                printf("Read data: %s\n", buffer);
            }
        }
    }

    close(fd);
    return 0;
}

在上述代码中，使用select等待文件描述符fd变为可读状态。如果select返回 -1，根据errno处理不同的错误情况。如果返回0，表示超时。如果返回大于0，检查文件描述符是否在可读集合中，然后进行read操作并处理可能的错误。

poll

poll函数也是用于多路复用I/O，其原型如下：

#include <poll.h>

int poll(struct pollfd *fds, nfds_t nfds, int timeout);

与select类似，当poll返回 -1 时，需要检查errno来处理错误。常见错误同样包括EINTR、EBADF等。

以下是一个使用poll进行非阻塞I/O并处理错误的示例代码：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <poll.h>

#define BUFFER_SIZE 1024

int main() {
    int fd = open("test.txt", O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    struct pollfd poll_fd;
    poll_fd.fd = fd;
    poll_fd.events = POLLIN;

    int result = poll(&poll_fd, 1, 5000);
    if (result == -1) {
        if (errno == EINTR) {
            printf("poll interrupted by signal, retrying...\n");
        } else if (errno == EBADF) {
            printf("Invalid file descriptor in poll.\n");
        } else {
            perror("poll");
        }
    } else if (result == 0) {
        printf("poll timeout.\n");
    } else {
        if (poll_fd.revents & POLLIN) {
            char buffer[BUFFER_SIZE];
            ssize_t bytes_read = read(fd, buffer, sizeof(buffer));
            if (bytes_read == -1) {
                if (errno == EAGAIN || errno == EWOULDBLOCK) {
                    printf("No data available yet, can retry later.\n");
                } else {
                    perror("read");
                }
            } else if (bytes_read > 0) {
                buffer[bytes_read] = '\0';
                printf("Read data: %s\n", buffer);
            }
        }
    }

    close(fd);
    return 0;
}

在上述代码中，使用poll等待文件描述符fd变为可读状态。如果poll返回 -1，根据errno处理不同的错误情况。如果返回0，表示超时。如果返回大于0，检查文件描述符是否有可读事件，然后进行read操作并处理可能的错误。

epoll

epoll是Linux特有的高效多路复用机制，相比于select和poll，它在处理大量文件描述符时性能更优。epoll相关函数主要有epoll_create、epoll_ctl和epoll_wait。

epoll_create用于创建一个epoll实例，epoll_ctl用于控制epoll实例中注册的文件描述符，epoll_wait用于等待事件发生。

以下是一个使用epoll进行非阻塞I/O并处理错误的示例代码：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/epoll.h>

#define BUFFER_SIZE 1024
#define EPOLL_SIZE 10

int main() {
    int fd = open("test.txt", O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }

    int epoll_fd = epoll_create1(0);
    if (epoll_fd == -1) {
        perror("epoll_create1");
        close(fd);
        exit(EXIT_FAILURE);
    }

    struct epoll_event event;
    event.data.fd = fd;
    event.events = EPOLLIN;

    if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1) {
        perror("epoll_ctl");
        close(fd);
        close(epoll_fd);
        exit(EXIT_FAILURE);
    }

    struct epoll_event events[EPOLL_SIZE];
    int num_events = epoll_wait(epoll_fd, events, EPOLL_SIZE, 5000);
    if (num_events == -1) {
        if (errno == EINTR) {
            printf("epoll_wait interrupted by signal, retrying...\n");
        } else {
            perror("epoll_wait");
        }
    } else if (num_events == 0) {
        printf("epoll_wait timeout.\n");
    } else {
        for (int i = 0; i < num_events; ++i) {
            if (events[i].data.fd == fd && (events[i].events & EPOLLIN)) {
                char buffer[BUFFER_SIZE];
                ssize_t bytes_read = read(fd, buffer, sizeof(buffer));
                if (bytes_read == -1) {
                    if (errno == EAGAIN || errno == EWOULDBLOCK) {
                        printf("No data available yet, can retry later.\n");
                    } else {
                        perror("read");
                    }
                } else if (bytes_read > 0) {
                    buffer[bytes_read] = '\0';
                    printf("Read data: %s\n", buffer);
                }
            }
        }
    }

    close(fd);
    close(epoll_fd);
    return 0;
}

在上述代码中，首先创建一个epoll实例，然后将文件描述符fd添加到epoll实例中，并等待事件发生。如果epoll_wait返回 -1，根据errno处理不同的错误情况。如果返回0，表示超时。如果返回大于0，检查文件描述符是否有可读事件，然后进行read操作并处理可能的错误。

非阻塞I/O错误处理中的资源管理

在处理非阻塞I/O错误时，资源管理是一个重要的方面。例如，在打开文件描述符后，如果在后续的I/O操作中发生错误，需要确保正确关闭文件描述符，以避免资源泄漏。

当使用多路复用技术时，也要注意在错误发生时正确清理相关的资源。比如在使用epoll时，如果在epoll_ctl或epoll_wait过程中发生错误，需要确保关闭epoll实例和相关的文件描述符。

以下是一个综合考虑资源管理的示例代码：

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/epoll.h>

#define BUFFER_SIZE 1024
#define EPOLL_SIZE 10

void clean_up(int fd, int epoll_fd) {
    if (fd != -1) {
        close(fd);
    }
    if (epoll_fd != -1) {
        close(epoll_fd);
    }
}

int main() {
    int fd = -1;
    int epoll_fd = -1;

    fd = open("test.txt", O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        clean_up(fd, epoll_fd);
        exit(EXIT_FAILURE);
    }

    epoll_fd = epoll_create1(0);
    if (epoll_fd == -1) {
        perror("epoll_create1");
        clean_up(fd, epoll_fd);
        exit(EXIT_FAILURE);
    }

    struct epoll_event event;
    event.data.fd = fd;
    event.events = EPOLLIN;

    if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1) {
        perror("epoll_ctl");
        clean_up(fd, epoll_fd);
        exit(EXIT_FAILURE);
    }

    struct epoll_event events[EPOLL_SIZE];
    int num_events = epoll_wait(epoll_fd, events, EPOLL_SIZE, 5000);
    if (num_events == -1) {
        if (errno == EINTR) {
            printf("epoll_wait interrupted by signal, retrying...\n");
        } else {
            perror("epoll_wait");
        }
        clean_up(fd, epoll_fd);
        exit(EXIT_FAILURE);
    } else if (num_events == 0) {
        printf("epoll_wait timeout.\n");
        clean_up(fd, epoll_fd);
        exit(EXIT_FAILURE);
    } else {
        for (int i = 0; i < num_events; ++i) {
            if (events[i].data.fd == fd && (events[i].events & EPOLLIN)) {
                char buffer[BUFFER_SIZE];
                ssize_t bytes_read = read(fd, buffer, sizeof(buffer));
                if (bytes_read == -1) {
                    if (errno == EAGAIN || errno == EWOULDBLOCK) {
                        printf("No data available yet, can retry later.\n");
                    } else {
                        perror("read");
                    }
                } else if (bytes_read > 0) {
                    buffer[bytes_read] = '\0';
                    printf("Read data: %s\n", buffer);
                }
            }
        }
    }

    clean_up(fd, epoll_fd);
    return 0;
}

在上述代码中，定义了一个clean_up函数来统一处理文件描述符和epoll实例的关闭操作。在程序的各个可能发生错误的地方，都会调用clean_up函数来确保资源的正确释放。

总结常见错误处理策略

对于EAGAIN/EWOULDBLOCK错误：通常表示操作暂时无法完成，可以选择轮询或者使用多路复用技术等待数据到达后重试操作。
对于EBADF错误：检查文件描述符的有效性，确保其未被关闭或从未打开过。
对于EINTR错误：通常可以选择重试被中断的系统调用。
对于EIO错误：检查相关设备或文件系统的状态，可能需要停止当前I/O操作。

在使用多路复用技术时，同样要根据select、poll、epoll等函数返回的错误码（如EINTR、EBADF等）进行相应的处理。同时，在整个非阻塞I/O过程中，要注意资源管理，确保在错误发生时正确释放相关资源，避免资源泄漏。通过正确处理这些错误，能够使基于Linux C语言的非阻塞I/O程序更加健壮和可靠。