From 54e03225b03641960a24c017f1820ffbd7f245a8 Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Mon, 29 Sep 2025 19:56:19 -0400 Subject: [PATCH 1/4] Remove need for extra bytes for remote consoles We use `writev()` for remote consoles to remove the need for extra bytes surrounding the data to be written. We really don't want to "leak" the protocol requirement for remote consoles to the rest of the code base. To hide that, and avoid two `write_all()` method calls in a row, we promote `writev_buffer_flush()` to a `utils` method called `writev_all()` to leverage I/O vectors in one call (memory copies will need to occur for remote sockets, so using `writev()` avoids that). Further, we use the `g_ptr_array_foreach()` method instead of doing it ourselves to avoid breaking the `GPtrArray` encapsulation. Signed-off-by: Peter Portante --- src/conn_sock.c | 39 +++++++++++++++------- src/conn_sock.h | 3 +- src/ctr_logging.c | 82 +++++++---------------------------------------- src/ctr_stdio.c | 13 ++------ src/utils.c | 56 ++++++++++++++++++++++++++++++-- src/utils.h | 8 ++++- 6 files changed, 104 insertions(+), 97 deletions(-) diff --git a/src/conn_sock.c b/src/conn_sock.c index c072ea71..bdcc8577 100644 --- a/src/conn_sock.c +++ b/src/conn_sock.c @@ -39,7 +39,7 @@ static char *setup_socket(int *fd, const char *path); setup_attach_socket() is responsible for setting the correct remote FD and pushing it onto the queue. */ static struct local_sock_s local_mainfd_stdin = {&mainfd_stdin, true, NULL, "container stdin", NULL}; -struct remote_sock_s remote_attach_sock = { +static struct remote_sock_s remote_attach_sock = { SOCK_TYPE_CONSOLE, /* sock_type */ -1, /* fd */ &local_mainfd_stdin, /* dest */ @@ -349,27 +349,42 @@ char *socket_parent_dir(gboolean use_full_attach_path, size_t desired_len) return base_path; } - void schedule_main_stdin_write() { schedule_local_sock_write(&local_mainfd_stdin); } -void write_back_to_remote_consoles(char *buf, int len) +struct write_console_sock_user_data { + stdpipe_t pipe; + char *buf; + size_t buflen; +}; + +static void write_console_sock(gpointer data, G_GNUC_UNUSED gpointer user_data) { - if (local_mainfd_stdin.readers == NULL) + struct remote_sock_s *remote_sock = (struct remote_sock_s *)data; + if (!remote_sock->writable) return; - - for (int i = local_mainfd_stdin.readers->len; i > 0; i--) { - struct remote_sock_s *remote_sock = g_ptr_array_index(local_mainfd_stdin.readers, i - 1); - - if (remote_sock->writable && write_all(remote_sock->fd, buf, len) < 0) { - nwarn("Failed to write to remote console socket"); - remote_sock_shutdown(remote_sock, SHUT_WR); - } + struct write_console_sock_user_data *wcsud = (struct write_console_sock_user_data *)user_data; + struct iovec iov[2] = {{&wcsud->pipe, 1}, {wcsud->buf, wcsud->buflen}}; + writev_iov_t wviov = {2, 2, iov}; + if (writev_all(remote_sock->fd, &wviov) < 0) { + nwarn("Failed to write to remote console socket"); + remote_sock_shutdown(remote_sock, SHUT_WR); } } +void write_back_to_remote_consoles(stdpipe_t pipe, char *buf, size_t buflen) +{ + if (local_mainfd_stdin.readers == NULL) + return; + GPtrArray *readers_copy = g_ptr_array_copy(local_mainfd_stdin.readers, NULL, NULL); + g_ptr_array_set_free_func(readers_copy, NULL); + struct write_console_sock_user_data wcsud = {pipe, buf, buflen}; + g_ptr_array_foreach(readers_copy, write_console_sock, &wcsud); + g_ptr_array_free(readers_copy, true); +} + /* Internal */ static gboolean attach_cb(int fd, G_GNUC_UNUSED GIOCondition condition, gpointer user_data) { diff --git a/src/conn_sock.h b/src/conn_sock.h index 3571cc43..b322f8eb 100644 --- a/src/conn_sock.h +++ b/src/conn_sock.h @@ -3,6 +3,7 @@ #include /* gboolean */ #include "config.h" /* CONN_SOCK_BUF_SIZE */ +#include "utils.h" /* stdpipe_t */ #define SOCK_TYPE_CONSOLE 1 #define SOCK_TYPE_NOTIFY 2 @@ -52,7 +53,7 @@ char *setup_seccomp_socket(const char *socket); char *setup_attach_socket(void); void setup_notify_socket(char *); void schedule_main_stdin_write(); -void write_back_to_remote_consoles(char *buf, int len); +void write_back_to_remote_consoles(stdpipe_t pipe, char *buf, size_t buflen); void close_all_readers(); #endif // CONN_SOCK_H diff --git a/src/ctr_logging.c b/src/ctr_logging.c index 797b469f..730e7691 100644 --- a/src/ctr_logging.c +++ b/src/ctr_logging.c @@ -79,19 +79,13 @@ static size_t syslog_identifier_len; #define WRITEV_BUFFER_N_IOV 128 -typedef struct { - int iovcnt; - struct iovec iov[WRITEV_BUFFER_N_IOV]; -} writev_buffer_t; - static void parse_log_path(char *log_config); static const char *stdpipe_name(stdpipe_t pipe); static int write_journald(int pipe, char *buf, ssize_t num_read); static int write_k8s_log(stdpipe_t pipe, const char *buf, ssize_t buflen); static bool get_line_len(ptrdiff_t *line_len, const char *buf, ssize_t buflen); -static ssize_t writev_buffer_append_segment(int fd, writev_buffer_t *buf, const void *data, ssize_t len); -static ssize_t writev_buffer_append_segment_no_flush(writev_buffer_t *buf, const void *data, ssize_t len); -static ssize_t writev_buffer_flush(int fd, writev_buffer_t *buf); +static ssize_t writev_buffer_append_segment(int fd, writev_iov_t *buf, const void *data, ssize_t len); +static ssize_t writev_buffer_append_segment_no_flush(writev_iov_t *buf, const void *data, ssize_t len); static void set_k8s_timestamp(char *buf, ssize_t buflen, const char *pipename); static void reopen_k8s_file(void); static int parse_priority_prefix(const char *buf, ssize_t buflen, int *priority, const char **message_start); @@ -386,7 +380,8 @@ static int write_journald(int pipe, char *buf, ssize_t buflen) ptrdiff_t line_len = 0; while (buflen > 0 || *partial_buf_len > 0) { - writev_buffer_t bufv = {0}; + struct iovec vecs[WRITEV_BUFFER_N_IOV]; + writev_iov_t bufv = {0, WRITEV_BUFFER_N_IOV, vecs}; bool partial = buflen == 0 || get_line_len(&line_len, buf, buflen); @@ -489,7 +484,8 @@ static int write_k8s_log(stdpipe_t pipe, const char *buf, ssize_t buflen) static bool stdout_has_partial = false; static bool stderr_has_partial = false; - writev_buffer_t bufv = {0}; + struct iovec vecs[WRITEV_BUFFER_N_IOV]; + writev_iov_t bufv = {0, WRITEV_BUFFER_N_IOV, vecs}; int64_t bytes_to_be_written = 0; bool *has_partial = (pipe == STDOUT_PIPE) ? &stdout_has_partial : &stderr_has_partial; @@ -550,7 +546,7 @@ static int write_k8s_log(stdpipe_t pipe, const char *buf, ssize_t buflen) * a timestamp. */ if ((log_size_max > 0) && (k8s_bytes_written + bytes_to_be_written) > log_size_max) { - if (writev_buffer_flush(k8s_log_fd, &bufv) < 0) { + if (writev_all(k8s_log_fd, &bufv) < 0) { nwarn("failed to flush buffer to log"); } reopen_k8s_file(); @@ -600,7 +596,7 @@ static int write_k8s_log(stdpipe_t pipe, const char *buf, ssize_t buflen) buflen -= line_len; } - if (writev_buffer_flush(k8s_log_fd, &bufv) < 0) { + if (writev_all(k8s_log_fd, &bufv) < 0) { nwarn("failed to flush buffer to log"); } @@ -622,66 +618,12 @@ static bool get_line_len(ptrdiff_t *line_len, const char *buf, ssize_t buflen) return partial; } - -static ssize_t writev_buffer_flush(int fd, writev_buffer_t *buf) -{ - ssize_t count = 0; - int iovcnt = buf->iovcnt; - struct iovec *iov = buf->iov; - - /* - * By definition, flushing the buffers will either be entirely successful, or will fail at some point - * along the way. There is no facility to attempt to retry a writev() system call outside of an EINTR - * errno. Therefore, no matter the outcome, always reset the writev_buffer_t data structure. - */ - buf->iovcnt = 0; - - while (iovcnt > 0) { - ssize_t res; - do { - res = writev(fd, iov, iovcnt); - } while (res == -1 && errno == EINTR); - - if (res <= 0) { - /* - * Any unflushed data is lost (this would be a good place to add a counter for how many times - * this occurs and another count for how much data is lost). - * - * Note that if writev() returns a 0, this logic considers it an error. - */ - return -1; - } - - count += res; - - while (res > 0) { - size_t iov_len = iov->iov_len; - size_t from_this = MIN((size_t)res, iov_len); - res -= from_this; - iov_len -= from_this; - - if (iov_len == 0) { - iov++; - iovcnt--; - /* continue, res still > 0 */ - } else { - iov->iov_len = iov_len; - iov->iov_base += from_this; - /* break, res is 0 */ - } - } - } - - return count; -} - - -ssize_t writev_buffer_append_segment(int fd, writev_buffer_t *buf, const void *data, ssize_t len) +ssize_t writev_buffer_append_segment(int fd, writev_iov_t *buf, const void *data, ssize_t len) { if (data == NULL) return 1; - if (buf->iovcnt == WRITEV_BUFFER_N_IOV && writev_buffer_flush(fd, buf) < 0) + if (buf->iovcnt == buf->max_iovcnt && writev_all(fd, buf) < 0) return -1; if (len > 0) { @@ -693,12 +635,12 @@ ssize_t writev_buffer_append_segment(int fd, writev_buffer_t *buf, const void *d return 1; } -ssize_t writev_buffer_append_segment_no_flush(writev_buffer_t *buf, const void *data, ssize_t len) +ssize_t writev_buffer_append_segment_no_flush(writev_iov_t *buf, const void *data, ssize_t len) { if (data == NULL) return 1; - if (buf->iovcnt == WRITEV_BUFFER_N_IOV) + if (buf->iovcnt == buf->max_iovcnt) return -1; if (len > 0) { diff --git a/src/ctr_stdio.c b/src/ctr_stdio.c index 5d7eb5e7..bb82737f 100644 --- a/src/ctr_stdio.c +++ b/src/ctr_stdio.c @@ -112,12 +112,7 @@ static void drain_log_buffers(stdpipe_t pipe) static bool read_stdio(int fd, stdpipe_t pipe, gboolean *eof) { - /* We use two extra bytes. One at the start, which we don't read into, instead - we use that for marking the pipe when we write to the attached socket. - One at the end to guarantee a null-terminated buffer for journald logging*/ - - char real_buf[STDIO_BUF_SIZE + 2]; - char *buf = real_buf + 1; + char buf[STDIO_BUF_SIZE]; ssize_t num_read = 0; if (eof) @@ -143,15 +138,11 @@ static bool read_stdio(int fd, stdpipe_t pipe, gboolean *eof) nwarnf("stdio_input read failed: %m"); return false; } else { - // Always null terminate the buffer, just in case. - buf[num_read] = '\0'; - bool written = write_to_logs(pipe, buf, num_read); if (!written) return false; - real_buf[0] = pipe; - write_back_to_remote_consoles(real_buf, num_read + 1); + write_back_to_remote_consoles(pipe, buf, num_read); return true; } } diff --git a/src/utils.c b/src/utils.c index 1fb5ab0b..b535e29d 100644 --- a/src/utils.c +++ b/src/utils.c @@ -62,9 +62,9 @@ static void get_signal_descriptor_mask(sigset_t *set) sigprocmask(SIG_BLOCK, set, NULL); } -ssize_t write_all(int fd, const void *buf, size_t count) +ssize_t write_all(int fd, const void *buf, size_t buflen) { - size_t remaining = count; + size_t remaining = buflen; const char *p = buf; ssize_t res; @@ -80,6 +80,58 @@ ssize_t write_all(int fd, const void *buf, size_t count) p += res; } + return buflen; +} + +ssize_t writev_all(int fd, writev_iov_t *buf) +{ + ssize_t count = 0; + size_t iovcnt = buf->iovcnt; + struct iovec *iov = buf->iov; + + /* + * By definition, flushing the buffers will either be entirely successful, or will fail at some point + * along the way. There is no facility to attempt to retry a writev() system call outside of an retryable + * errno. Therefore, no matter the outcome, always reset the given writev_iov_t data structure. + */ + buf->iovcnt = 0; + + while (iovcnt > 0) { + ssize_t res; + do { + res = writev(fd, iov, iovcnt); + } while (res == -1 && retryable_error(errno)); + + if (res <= 0) { + /* + * Any unflushed data is lost (this would be a good place to add a counter for how many times + * this occurs and another count for how much data is lost). + * + * Note that if writev() returns a 0, this logic considers it an error. + */ + return -1; + } + + count += res; + + while (res > 0) { + size_t iov_len = iov->iov_len; + size_t from_this = MIN((size_t)res, iov_len); + res -= from_this; + iov_len -= from_this; + + if (iov_len == 0) { + iov++; + iovcnt--; + /* continue, res still > 0 */ + } else { + iov->iov_len = iov_len; + iov->iov_base += from_this; + /* break, res is 0 */ + } + } + } + return count; } diff --git a/src/utils.h b/src/utils.h index 00acb9c4..fced4f85 100644 --- a/src/utils.h +++ b/src/utils.h @@ -255,7 +255,13 @@ static inline void hashtable_free_cleanup(GHashTable **tbl) #define _cleanup_gerror_ _cleanup_(gerror_free_cleanup) -ssize_t write_all(int fd, const void *buf, size_t count); +ssize_t write_all(int fd, const void *buf, size_t buflen); +typedef struct { + size_t iovcnt; + size_t max_iovcnt; + struct iovec *iov; +} writev_iov_t; +ssize_t writev_all(int fd, writev_iov_t *iov); int set_subreaper(gboolean enabled); From fde5ff1dd53a51cbe57414f86f71ed95e53dfae7 Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Wed, 8 Oct 2025 01:23:10 -0400 Subject: [PATCH 2/4] Add an easy cleanup for tests The `make clean` target now cleans up the `/tmp` area a bit. It leaves around the cache of the `busybox` image to avoid fetching it too often. Signed-off-by: Peter Portante --- Makefile | 1 + test/Makefile | 3 +++ test/test_helper.bash | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 test/Makefile diff --git a/Makefile b/Makefile index 4ccf7f91..5c1acb97 100644 --- a/Makefile +++ b/Makefile @@ -106,6 +106,7 @@ docs: .PHONY: clean clean: rm -rf bin/ src/*.o src/*.gcno src/*.gcda *.gcov + $(MAKE) -C test clean $(MAKE) -C docs clean .PHONY: install install.bin install.crio install.podman podman crio diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 00000000..29f0a3df --- /dev/null +++ b/test/Makefile @@ -0,0 +1,3 @@ +.PHONY: clean +clean: + rm -rf /tmp/conmon-test-* /tmp/conmon-term.* diff --git a/test/test_helper.bash b/test/test_helper.bash index 1880f5b0..993a54c5 100644 --- a/test/test_helper.bash +++ b/test/test_helper.bash @@ -25,8 +25,8 @@ RUNTIME_BINARY="${RUNTIME_BINARY:-/usr/bin/runc}" # UBI10-micro container image for test rootfs UBI10_MICRO_IMAGE="registry.access.redhat.com/ubi10/ubi-micro:latest" -ROOTFS_CACHE_DIR="/tmp/conmon-test-images" -ROOTFS_CACHE_MARKER="/tmp/conmon-test-images/.ubi10-micro-cached" +ROOTFS_CACHE_DIR="/tmp/conmon-images" +ROOTFS_CACHE_MARKER="/tmp/conmon-images/.ubi10-micro-cached" VALID_PATH="/tmp" INVALID_PATH="/not/a/path" From d01f56c6fe74c82118956f0e7228cb86bc25dfa8 Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Wed, 8 Oct 2025 01:35:55 -0400 Subject: [PATCH 3/4] Use `size_t` with name `buflen` consistently A few of the parameters were named `num_read` "leaking" the use it was named for in a calling function. We use `buflen` with `buf` more consistently, and use `size_t` were possible since we don't need or want to pass a negative value for the size of a buffer. Signed-off-by: Peter Portante --- src/conmon.c | 8 ++++---- src/ctr_logging.c | 36 ++++++++++++++++++------------------ src/ctr_logging.h | 2 +- src/ctr_stdio.c | 4 ++-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/conmon.c b/src/conmon.c index 1b225d41..d6fce20d 100644 --- a/src/conmon.c +++ b/src/conmon.c @@ -42,7 +42,7 @@ int main(int argc, char *argv[]) umask(DEFAULT_UMASK); _cleanup_gerror_ GError *err = NULL; char buf[BUF_SIZE]; - int num_read; + ssize_t num_read; _cleanup_close_ int dev_null_r_cleanup = -1; _cleanup_close_ int dev_null_w_cleanup = -1; _cleanup_close_ int dummyfd = -1; @@ -66,7 +66,7 @@ int main(int argc, char *argv[]) /* Block for an initial write to the start pipe before spawning any children or exiting, to ensure the parent can put us in the right cgroup. */ - num_read = read(start_pipe_fd, buf, BUF_SIZE); + num_read = read(start_pipe_fd, buf, sizeof(buf)); if (num_read < 0) { pexit("start-pipe read failed"); } @@ -282,7 +282,7 @@ int main(int argc, char *argv[]) if (opt_attach) { if (start_pipe_fd > 0) { ndebug("exec with attach is waiting for start message from parent"); - num_read = read(start_pipe_fd, buf, BUF_SIZE); + num_read = read(start_pipe_fd, buf, sizeof(buf)); if (num_read < 0) { _pexit("start-pipe read failed"); } @@ -370,7 +370,7 @@ int main(int argc, char *argv[]) * Read from container stderr for any error and send it to parent * We send -1 as pid to signal to parent that create container has failed. */ - num_read = read(mainfd_stderr, buf, BUF_SIZE - 1); + num_read = read(mainfd_stderr, buf, sizeof(buf) - 1); const char *error_msg = NULL; if (num_read > 0) { buf[num_read] = '\0'; diff --git a/src/ctr_logging.c b/src/ctr_logging.c index 730e7691..8ce251db 100644 --- a/src/ctr_logging.c +++ b/src/ctr_logging.c @@ -81,14 +81,14 @@ static size_t syslog_identifier_len; static void parse_log_path(char *log_config); static const char *stdpipe_name(stdpipe_t pipe); -static int write_journald(int pipe, char *buf, ssize_t num_read); -static int write_k8s_log(stdpipe_t pipe, const char *buf, ssize_t buflen); -static bool get_line_len(ptrdiff_t *line_len, const char *buf, ssize_t buflen); -static ssize_t writev_buffer_append_segment(int fd, writev_iov_t *buf, const void *data, ssize_t len); -static ssize_t writev_buffer_append_segment_no_flush(writev_iov_t *buf, const void *data, ssize_t len); -static void set_k8s_timestamp(char *buf, ssize_t buflen, const char *pipename); +static int write_journald(int pipe, char *buf, size_t buflen); +static int write_k8s_log(stdpipe_t pipe, const char *buf, size_t buflen); +static bool get_line_len(ptrdiff_t *line_len, const char *buf, size_t buflen); +static ssize_t writev_buffer_append_segment(int fd, writev_iov_t *buf, const void *data, size_t len); +static ssize_t writev_buffer_append_segment_no_flush(writev_iov_t *buf, const void *data, size_t len); +static void set_k8s_timestamp(char *buf, size_t buflen, const char *pipename); static void reopen_k8s_file(void); -static int parse_priority_prefix(const char *buf, ssize_t buflen, int *priority, const char **message_start); +static int parse_priority_prefix(const char *buf, size_t buflen, int *priority, const char **message_start); gboolean logging_is_passthrough(void) @@ -292,13 +292,13 @@ static void parse_log_path(char *log_config) } /* write container output to all logs the user defined */ -bool write_to_logs(stdpipe_t pipe, char *buf, ssize_t num_read) +bool write_to_logs(stdpipe_t pipe, char *buf, size_t buflen) { - if (use_k8s_logging && write_k8s_log(pipe, buf, num_read) < 0) { + if (use_k8s_logging && write_k8s_log(pipe, buf, buflen) < 0) { nwarn("write_k8s_log failed"); return G_SOURCE_CONTINUE; } - if (use_journald_logging && write_journald(pipe, buf, num_read) < 0) { + if (use_journald_logging && write_journald(pipe, buf, buflen) < 0) { nwarn("write_journald failed"); return G_SOURCE_CONTINUE; } @@ -316,7 +316,7 @@ bool write_to_logs(stdpipe_t pipe, char *buf, ssize_t num_read) * 0 if no valid priority prefix was found * -1 on error (invalid parameters) */ -static int parse_priority_prefix(const char *buf, ssize_t buflen, int *priority, const char **message_start) +static int parse_priority_prefix(const char *buf, size_t buflen, int *priority, const char **message_start) { if (!buf || !priority || !message_start) { return -1; @@ -353,7 +353,7 @@ static int parse_priority_prefix(const char *buf, ssize_t buflen, int *priority, * otherwise, write with error priority. Partial lines (that don't end in a newline) are buffered * between invocations. A 0 buflen argument forces a buffered partial line to be flushed. */ -static int write_journald(int pipe, char *buf, ssize_t buflen) +static int write_journald(int pipe, char *buf, size_t buflen) { static char stdout_partial_buf[STDIO_BUF_SIZE]; static size_t stdout_partial_buf_len = 0; @@ -479,7 +479,7 @@ static int write_journald(int pipe, char *buf, ssize_t buflen) * not terminated by a newline. A 0 buflen argument forces any buffered partial * line to be finalized with an F-sequence. */ -static int write_k8s_log(stdpipe_t pipe, const char *buf, ssize_t buflen) +static int write_k8s_log(stdpipe_t pipe, const char *buf, size_t buflen) { static bool stdout_has_partial = false; static bool stderr_has_partial = false; @@ -606,7 +606,7 @@ static int write_k8s_log(stdpipe_t pipe, const char *buf, ssize_t buflen) /* Find the end of the line, or alternatively the end of the buffer. * Returns false in the former case (it's a whole line) or true in the latter (it's a partial) */ -static bool get_line_len(ptrdiff_t *line_len, const char *buf, ssize_t buflen) +static bool get_line_len(ptrdiff_t *line_len, const char *buf, size_t buflen) { bool partial = FALSE; const char *line_end = memchr(buf, '\n', buflen); @@ -618,7 +618,7 @@ static bool get_line_len(ptrdiff_t *line_len, const char *buf, ssize_t buflen) return partial; } -ssize_t writev_buffer_append_segment(int fd, writev_iov_t *buf, const void *data, ssize_t len) +ssize_t writev_buffer_append_segment(int fd, writev_iov_t *buf, const void *data, size_t len) { if (data == NULL) return 1; @@ -635,7 +635,7 @@ ssize_t writev_buffer_append_segment(int fd, writev_iov_t *buf, const void *data return 1; } -ssize_t writev_buffer_append_segment_no_flush(writev_iov_t *buf, const void *data, ssize_t len) +ssize_t writev_buffer_append_segment_no_flush(writev_iov_t *buf, const void *data, size_t len) { if (data == NULL) return 1; @@ -668,7 +668,7 @@ static const char *stdpipe_name(stdpipe_t pipe) } /* Generate timestamp string to buf. */ -static void set_k8s_timestamp(char *buf, ssize_t buflen, const char *pipename) +static void set_k8s_timestamp(char *buf, size_t buflen, const char *pipename) { static int tzset_called = 0; @@ -715,7 +715,7 @@ static void set_k8s_timestamp(char *buf, ssize_t buflen, const char *pipename) off_sign, off / 3600, (off % 3600) / 60, pipename); /* Ensure null termination if snprintf output exceeds buffer length. */ - if (len >= buflen && buflen > 0) { + if (len >= (ssize_t)buflen && buflen > 0) { buf[buflen - 1] = '\0'; } } diff --git a/src/ctr_logging.h b/src/ctr_logging.h index 1646276b..de4155a5 100644 --- a/src/ctr_logging.h +++ b/src/ctr_logging.h @@ -6,7 +6,7 @@ #include /* bool */ void reopen_log_files(void); -bool write_to_logs(stdpipe_t pipe, char *buf, ssize_t num_read); +bool write_to_logs(stdpipe_t pipe, char *buf, size_t buflen); void configure_log_drivers(gchar **log_drivers, int64_t log_size_max_, int64_t log_global_size_max_, char *cuuid_, char *name_, char *tag, gchar **labels); void sync_logs(void); diff --git a/src/ctr_stdio.c b/src/ctr_stdio.c index bb82737f..ff70154c 100644 --- a/src/ctr_stdio.c +++ b/src/ctr_stdio.c @@ -138,11 +138,11 @@ static bool read_stdio(int fd, stdpipe_t pipe, gboolean *eof) nwarnf("stdio_input read failed: %m"); return false; } else { - bool written = write_to_logs(pipe, buf, num_read); + bool written = write_to_logs(pipe, buf, (size_t)num_read); if (!written) return false; - write_back_to_remote_consoles(pipe, buf, num_read); + write_back_to_remote_consoles(pipe, buf, (size_t)num_read); return true; } } From 4db9fa45d3aad3b7be3e2d1c9d6d004bd192d67b Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Mon, 29 Sep 2025 14:55:43 -0400 Subject: [PATCH 4/4] Use the actual pipe size instead of 8,192 The default for linux pipes is typically something much larger than 8,192 bytes. We take advantage of this to fact to avoid multiple rounds of reading from a full pipe. The constant, `STDIO_BUF_SIZE` is removed in favor of `fcntl(F_GETPIPE_SZ)` calls for pipes / fifos, an adding `DEF_STDIO_BUF_SIZE` for the case where `stdout` is not a PIPE (perhaps character special file) using a 64K buffer to consume more data per system when possible. Signed-off-by: Peter Portante --- src/config.h | 2 +- src/conmon.c | 15 +++++++++++++++ src/ctr_logging.c | 15 ++++++++++++--- src/ctr_stdio.c | 5 +++-- src/ctrl.c | 12 ++++++++++++ src/globals.c | 3 +++ src/globals.h | 3 +++ 7 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/config.h b/src/config.h index 4277e5cc..e472e691 100644 --- a/src/config.h +++ b/src/config.h @@ -3,7 +3,7 @@ #define CONFIG_H #define BUF_SIZE 8192 -#define STDIO_BUF_SIZE 8192 +#define DEF_STDOUT_BUF_SIZE 65536 #define CONN_SOCK_BUF_SIZE 32768 #define DEFAULT_SOCKET_PATH "/var/run/crio" #define WIN_RESIZE_EVENT 1 diff --git a/src/conmon.c b/src/conmon.c index d6fce20d..980eb4f5 100644 --- a/src/conmon.c +++ b/src/conmon.c @@ -165,6 +165,10 @@ int main(int argc, char *argv[]) pexit("Failed to create !terminal stdin pipe"); mainfd_stdin = fds[1]; + ret = fcntl(mainfd_stdin, F_GETPIPE_SZ); + if (ret < 0) + pexit("main stdin pipe size determination failed"); + mainfd_stdin_size = (size_t)ret; workerfd_stdin = fds[0]; if (g_unix_set_fd_nonblocking(mainfd_stdin, TRUE, NULL) == FALSE) @@ -175,6 +179,10 @@ int main(int argc, char *argv[]) pexit("Failed to create !terminal stdout pipe"); mainfd_stdout = fds[0]; + ret = fcntl(mainfd_stdout, F_GETPIPE_SZ); + if (ret < 0) + pexit("main stdout pipe size determination failed"); + mainfd_stdout_size = (size_t)ret; workerfd_stdout = fds[1]; } @@ -194,6 +202,13 @@ int main(int argc, char *argv[]) pexit("Failed to create stderr pipe"); mainfd_stderr = fds[0]; + ret = fcntl(mainfd_stderr, F_GETPIPE_SZ); + if (ret < 0) + pexit("main stderr pipe size determination failed"); + mainfd_stderr_size = (size_t)ret; + if ((mainfd_stdout >= 0) && (mainfd_stderr_size != mainfd_stdout_size)) { + nwarn("main stderr and stdout pipe sizes don't match"); + } workerfd_stderr = fds[1]; GPtrArray *runtime_argv = configure_runtime_args(csname); diff --git a/src/ctr_logging.c b/src/ctr_logging.c index 8ce251db..eed3009c 100644 --- a/src/ctr_logging.c +++ b/src/ctr_logging.c @@ -1,5 +1,6 @@ #define _GNU_SOURCE #include "ctr_logging.h" +#include "globals.h" #include "cli.h" #include "config.h" #include @@ -355,10 +356,18 @@ static int parse_priority_prefix(const char *buf, size_t buflen, int *priority, */ static int write_journald(int pipe, char *buf, size_t buflen) { - static char stdout_partial_buf[STDIO_BUF_SIZE]; + static char *stdout_partial_buf = NULL; static size_t stdout_partial_buf_len = 0; - static char stderr_partial_buf[STDIO_BUF_SIZE]; + static char *stderr_partial_buf = NULL; static size_t stderr_partial_buf_len = 0; + size_t buf_size = (pipe == STDOUT_PIPE ? mainfd_stdout_size : mainfd_stderr_size); + + if (stdout_partial_buf == NULL) { + stdout_partial_buf = g_malloc(mainfd_stdout_size); + } + if (stderr_partial_buf == NULL) { + stderr_partial_buf = g_malloc(mainfd_stderr_size); + } char *partial_buf; size_t *partial_buf_len; @@ -388,7 +397,7 @@ static int write_journald(int pipe, char *buf, size_t buflen) /* If this is a partial line, and we have capacity to buffer it, buffer it and return. * The capacity of the partial_buf is one less than its size so that we can always add * a null terminating char later */ - if (buflen && partial && ((unsigned long)line_len < (STDIO_BUF_SIZE - *partial_buf_len))) { + if (buflen && partial && ((unsigned long)line_len < (buf_size - *partial_buf_len))) { memcpy(partial_buf + *partial_buf_len, buf, line_len); *partial_buf_len += line_len; return 0; diff --git a/src/ctr_stdio.c b/src/ctr_stdio.c index ff70154c..29d343ee 100644 --- a/src/ctr_stdio.c +++ b/src/ctr_stdio.c @@ -112,13 +112,14 @@ static void drain_log_buffers(stdpipe_t pipe) static bool read_stdio(int fd, stdpipe_t pipe, gboolean *eof) { - char buf[STDIO_BUF_SIZE]; + size_t buf_size = ((pipe == STDOUT_PIPE) ? mainfd_stdout_size : mainfd_stderr_size); + char *buf = alloca(buf_size); ssize_t num_read = 0; if (eof) *eof = false; - num_read = read(fd, buf, STDIO_BUF_SIZE); + num_read = read(fd, buf, buf_size); if (num_read == 0) { if (eof) *eof = true; diff --git a/src/ctrl.c b/src/ctrl.c index c525b3ae..5170b296 100644 --- a/src/ctrl.c +++ b/src/ctrl.c @@ -72,6 +72,18 @@ gboolean terminal_accept_cb(int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC mainfd_stdout = dup(console.fd); if (mainfd_stdout < 0) pexit("Failed to dup console file descriptor"); + struct stat stat_s = {0}; + int ret = fstat(mainfd_stdout, &stat_s); + if (ret < 0) + pexit("main stdout pipe fstat() failed"); + if (S_ISFIFO(stat_s.st_mode)) { + ret = fcntl(mainfd_stdout, F_GETPIPE_SZ); + if (ret < 0) + pexit("main stdout pipe size determination failed"); + mainfd_stdout_size = (size_t)ret; + } else { + mainfd_stdout_size = DEF_STDOUT_BUF_SIZE; + } /* Now that we have a fd to the tty, make sure we handle any pending data * that was already buffered. */ diff --git a/src/globals.c b/src/globals.c index 0d052a89..ebe76c4f 100644 --- a/src/globals.c +++ b/src/globals.c @@ -4,8 +4,11 @@ int runtime_status = -1; int container_status = -1; int mainfd_stdin = -1; +size_t mainfd_stdin_size = 0; int mainfd_stdout = -1; +size_t mainfd_stdout_size = 0; int mainfd_stderr = -1; +size_t mainfd_stderr_size = 0; int attach_socket_fd = -1; int console_socket_fd = -1; diff --git a/src/globals.h b/src/globals.h index 28292993..6ac8e92d 100644 --- a/src/globals.h +++ b/src/globals.h @@ -8,8 +8,11 @@ extern int runtime_status; extern int container_status; extern int mainfd_stdin; +extern size_t mainfd_stdin_size; extern int mainfd_stdout; +extern size_t mainfd_stdout_size; extern int mainfd_stderr; +extern size_t mainfd_stderr_size; extern int attach_socket_fd; extern int console_socket_fd;