From 1be420265f87c15a31c0d9bca2d94107b1266754 Mon Sep 17 00:00:00 2001 From: Cal Heldenbrand Date: Thu, 23 Oct 2025 11:26:01 -0500 Subject: [PATCH 1/6] Reduce apache threads for easier debugging --- docker/httpd.conf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker/httpd.conf b/docker/httpd.conf index dc6083f..e22eeba 100644 --- a/docker/httpd.conf +++ b/docker/httpd.conf @@ -15,7 +15,16 @@ DocumentRoot "/var/www/html" # Allow open access: Require all granted +# Make apache chill out on the event MPM settings so that +# we don't have such a huge mess of threads in gdb to deal with MaxClients 1 +ThreadsPerChild 1 +MaxRequestWorkers 1 +MinSpareThreads 1 +MaxSpareThreads 1 +MaxConnectionsPerChild 0 +ServerLimit 1 + CoreDumpDirectory /tmp RubyHandlerDeclare TEST RubyHandlerModule TEST "/var/www/html/test.rb" From d4a8fcb0a7b865367c8b2c8983dfbb064e5af230 Mon Sep 17 00:00:00 2001 From: Cal Heldenbrand Date: Fri, 24 Oct 2025 09:52:04 -0500 Subject: [PATCH 2/6] Setting up OpenAI's codex CLI agent --- Dockerfile.codex | 107 ++++++++++++++++++++++++++++++++++++++++++ docker/httpd-gdb-loop | 88 ++++++++++++++++++++++++++++++++++ script/docker_codex | 26 ++++++++++ 3 files changed, 221 insertions(+) create mode 100644 Dockerfile.codex create mode 100755 docker/httpd-gdb-loop create mode 100755 script/docker_codex diff --git a/Dockerfile.codex b/Dockerfile.codex new file mode 100644 index 0000000..d15509a --- /dev/null +++ b/Dockerfile.codex @@ -0,0 +1,107 @@ +# This builds a docker image suitable for running OpenAI's codex CLI +FROM oraclelinux:8 + +RUN dnf install -y oraclelinux-release-el8 oracle-epel-release-el8 \ + && dnf --enablerepo=ol8_codeready_builder install -y \ + apr-devel \ + apr-util \ + apr-util-devel \ + autoconf \ + automake \ + bison \ + bzip2 \ + cmake3 \ + curl \ + cyrus-sasl \ + cyrus-sasl-devel \ + flex \ + gcc \ + gcc-c++ \ + gdb \ + git \ + gpg \ + httpd \ + httpd-devel \ + libffi-devel \ + libtool \ + libyaml \ + libyaml-devel \ + openssl-devel \ + patch \ + readline-devel \ + ruby \ + sqlite-devel \ + make \ + redhat-lsb \ + unzip \ + zlib-devel \ + && dnf module enable nodejs:22 \ + && dnf install nodejs nodejs-devel npm \ + && npm install -g @openai/codex + +# Import GPG key for RVM +RUN gpg \ + --keyserver keyserver.ubuntu.com \ + --recv-keys \ + 409B6B1796C275462A1703113804BB82D39DC0E3 \ + 7D2BAF1CF37B13E2069D6956105BD0E739499BDB + +# Install RVM system wide +RUN curl -sSL https://get.rvm.io | bash -s stable + +# Set our shell to a bash full login environment to pull in +# RVM's profile in all RUN instructions below. +# (Requires Docker 1.12) +SHELL ["/bin/bash", "-l", "-c"] + +# In case we missed any package requirements, this installs them +RUN rvm requirements + +# Pick your ruby version here +# Segfaults in rb_protect() +RUN rvm install ruby-3.2.3 --disable-binary +# Works +#RUN rvm install ruby-2.7.5 +# Same segfault as 3.2.3 +#RUN rvm install ruby-3.0.3 + +# Setup our libruby.so dir in ld.so.conf +RUN rvm config-get libdir > /etc/ld.so.conf.d/ruby.conf && ldconfig + +WORKDIR /usr/src/mod_ruby + +COPY . /usr/src/mod_ruby + +# Pulls in the RVM environment and installed ruby +RUN cmake3 . && make -j4 && make install + +# Remove some junk that the httpd package installs +RUN rm -f /etc/httpd/conf.d/welcome.conf /etc/httpd/conf.modules.d/00-systemd.conf + +# Manually copy some files I couldn't figure out with the CMake system +RUN cp -a config/mod_ruby.conf /etc/httpd/conf.modules.d/ + +# librhtml.so +RUN cp -a lib/* $(rvm config-get libdir) && ldconfig + +COPY docker/index.html /var/www/html/index.html +COPY docker/*.rb /var/www/html/ +COPY docker/*.cgi /var/www/cgi-bin/ +COPY docker/httpd.conf /etc/httpd/conf/httpd.conf +COPY docker/gdb.input /gdb.input +COPY docker/httpd-gdb /httpd-gdb +COPY docker/httpd-gdb-loop /httpd-gdb-loop + +# Force apache logs to docker console logs +#RUN ln -sf /dev/console /var/log/httpd/access_log \ +# && ln -sf /dev/console /var/log/httpd/error_log + +# Graceful shutdown signal for apache +# (Requires Docker > 1.11) +# Note: gdb is set to trap SIGWINCH, so this is for +# alternate uses with less debugging +#STOPSIGNAL SIGWINCH + +# If you want a simpler image without gdb... +#CMD ["/usr/sbin/httpd", "-D", "FOREGROUND"] +CMD ["/httpd-gdb"] diff --git a/docker/httpd-gdb-loop b/docker/httpd-gdb-loop new file mode 100755 index 0000000..c188008 --- /dev/null +++ b/docker/httpd-gdb-loop @@ -0,0 +1,88 @@ +#!/bin/bash +# This is similar to httpd-gdb, except it's more hands-off and +# better suited for automated testing. If apache crashes the +# stack trace will be written to /var/log/httpd/gdb_backtrace.txt +# and then apache is restarted. Each new crash will delete the +# backtrace log and dump the fresh backtrace into it. +#set -euo pipefail + +# Config +TRACE_FILE="${TRACE_FILE:-/var/log/httpd/gdb_backtrace.txt}" + +mkdir -p "$(dirname "$TRACE_FILE")" + +# Clean previous trace each run +rm -f "$TRACE_FILE" + +# Ensure child gdb/httpd dies if this script is killed +trap 'pkill -P $$ || true' INT TERM + +while :; do + ts="$(date -Is)" + tmp="${TRACE_FILE}.tmp" + + echo "[$ts] starting httpd" + + rm -f /var/run/httpd/httpd.pid + + /usr/sbin/httpd 1>>"$tmp" 2>>"$tmp" + + ret=$? + if [[ "$ret" != "0" ]]; then + { + echo "Failed to start up Apache:" + echo + } >>"$tmp" + mv -f "$tmp" "$TRACE_FILE" + # longer sleep time on big time fails + sleep 10 + continue + fi + + sleep 2 + pid=$(ps ax |grep /usr/sbin/httpd |grep -v grep | grep -v defunct | awk '{print $1}' |tail -1) + echo "httpd fork pid: $pid" + + # If apache started up successfully and the previous trace file + # contains a fail message, delete the trace file. We want to keep + # backtraces intact for observability though. + if grep -q "Failed to start up Apache" "$TRACE_FILE"; then + echo "Deleting trace file" + rm -f "$TRACE_FILE" + fi + + #-ex "set detach-on-fork on" \ + #-ex "handle SIGPIPE nostop noprint pass" \ + #-ex "handle SIGSEGV stop print nopass" \ + #-ex "set follow-fork-mode child" \ + gdb -p $pid -q --batch \ + -ex "set pagination off" \ + -ex "set confirm off" \ + -ex "handle SIGPIPE nostop noprint pass" \ + -ex "handle SIGSEGV stop print nopass" \ + -ex "continue" \ + -ex "echo \n===== THREAD BACKTRACE =====\n" \ + -ex "thread apply all bt full" \ + -ex "echo \n===== REGISTERS =====\n" \ + -ex "info registers" \ + 2>&1 | tee "$tmp" + + # If it crashed, keep the trace; else discard. + if grep -q "received signal SIGSEGV" "$tmp"; then + { + echo "===== TIMESTAMP =====" + echo "$ts" + } >>"$tmp" + mv -f "$tmp" "$TRACE_FILE" + echo "segfault detected. backtrace saved to: $TRACE_FILE" + else + cat "$tmp" + rm -f "$tmp" + echo "no segfault detected. restarting." + fi + + ps ax |grep /usr/sbin/httpd |grep -v grep | grep -v defunct | awk '{print $1}' | xargs kill -9 + + sleep 2 +done + diff --git a/script/docker_codex b/script/docker_codex new file mode 100755 index 0000000..625b041 --- /dev/null +++ b/script/docker_codex @@ -0,0 +1,26 @@ +#!/bin/bash +root=$( readlink -f $( dirname $( readlink -f $0 ) )/.. ) +docker build -f $root/Dockerfile.codex -t mod_ruby_codex $root + +docker rm -f mod_ruby_codex_run_container +# Change 8080 here if you have a local conflict with it +docker run \ + --name=mod_ruby_codex_run_container \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + -v $root:/usr/src/mod_ruby \ + -v $HOME/.codex:/root/.codex \ + -d \ + -p 8080:80 \ + mod_ruby_codex \ + /bin/sleep infinity + +# Start up Apache in the background. This script will loop +# restarting Apache when it crashes. Stack traces are logged +# for the Codex agent to inspect. +docker exec -d mod_ruby_codex_run_container /httpd-gdb-loop +#docker exec -ti mod_ruby_codex_run_container /httpd-gdb-loop + +# Run the Codex agent with full access and web search +docker exec -ti mod_ruby_codex_run_container codex --sandbox danger-full-access --search --cd /usr/src/mod_ruby + From be1feb95500179b7d4f960ea7df8518011670383 Mon Sep 17 00:00:00 2001 From: Cal Heldenbrand Date: Fri, 24 Oct 2025 09:52:52 -0500 Subject: [PATCH 3/6] AGENTS.md instructions for LLM agents --- AGENTS.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..c581405 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,31 @@ +# LLM Agent Instructions + +* To compile `ModRuby`, run these commands: + * `rm -f /usr/src/mod_ruby/CMakeCache.txt` + * `rm -Rf /usr/src/mod_ruby/CMakeFiles` + * `cmake3 /usr/src/mod_ruby` + * `make -j4` + * `make install` + * If any compilation commands fail, the patch failed. +* To test `ModRuby`: + * Reload Apache by running `killall httpd` -- it will automatically restart and load + the new `mod_ruby.so` shared object that was previously compiled. + * Use curl to test `ModRuby` with this: `curl -m1 localhost` + * It should output the string `Ruby check_access()\nHi there from ruby` + * If curl times out and Apache segfaults, the patch failed + * To inspect the backtrace, look at the file `/var/log/httpd/gdb_backtrace.txt` + * After each crash, Apache is restarted + * Each new crash will delete the previous contents of `/var/log/httpd/gdb_backtrace.txt` + * If Apache completely fails to start up due to a symbol linking problem or other error, + the output from the server will be in `/var/log/httpd/error_log` and it will + continue to try and restart itself every 10 seconds. The log file might be large, + so something like `tail -20 /var/log/httpd/error_log` is a safer method to inspect + this file. +* codex is running inside an Oracle Linux 8 Docker container, running as the root user. + If you need tools or packages installed, you may use `dnf install` for distro packages + or use other methods. +* NodeJS 22 is installed and you may install npm packages +* Python 3.12 is installed and you may construct Python utility scripts and execute them. +* If you want to inspect the Ruby source code, it is installed in `/usr/local/rvm/src/ruby-3.2.3/` + * If you want to inspect the Ruby C++ headers, they are in `/usr/local/rvm/src/ruby-3.2.3/include/` +* If you want to inspect the Ruby binary installation, it is installed in `/usr/local/rvm/rubies/ruby-3.2.3/` From d5e0fafb605516efe6a7377d8d6a3db53fa116ad Mon Sep 17 00:00:00 2001 From: Cal Heldenbrand Date: Fri, 24 Oct 2025 09:53:59 -0500 Subject: [PATCH 4/6] Ruby 3.2 port. READ AI CONTENT CAUTION BELOW: The differences between Ruby 2 and Ruby 3 are significant in terms of thread safety and memory safety. I (Cal) have not worked with the Ruby SDK very much in the past. There is not much documentation for C++ embedders to use, especially concerning the newer Ruby versions. So I decided to try out OpenAI's Codex CLI. I gave it access to all of Ruby 3.2's source and headers, along with a mechanism to compile and test ModRuby on a live Apache server. I showed it the segfault back trace in rb_protect() and told it to resolve the issue. It spent a total of around an hour doing web searches, reading the source and trying things out. It made mistakes, spotted its own errors, then recompiled and retested until it had success. I've scanned over this commit and I don't see any obvious problems with it. There were two main issues here for the Ruby 3 port: 1) You can't call rb_protect() without a global VM lock 2) You can't get the global VM lock from a thread outside of Ruby (IE: In an Apache MPM thread) Fixing #1 caused #2. The RubyDispatcher class shifts interaction with Ruby to an interface that manages the global VM. rb_protect() is wrapped in a GVL so that native C++ functions can interact with Ruby objects and methods. --- src/module.cpp | 571 ++++++++++++++++++++++++++++++++++++------------- src/ruby.cpp | 99 ++++++++- 2 files changed, 515 insertions(+), 155 deletions(-) diff --git a/src/module.cpp b/src/module.cpp index e27f86f..176e864 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -2,9 +2,17 @@ #include #include +#include +#include +#include #include -#include +#include +#include #include +#include +#include +#include +#include #include "ruby.hpp" #include "request.h" @@ -23,6 +31,9 @@ #include "config.h" #include "module.hpp" +#include + +extern "C" int ruby_thread_has_gvl_p(void); #if AP_SERVER_MINORVERSION_NUMBER >= 4 APLOG_USE_MODULE(); @@ -32,6 +43,199 @@ typedef VALUE (*fn)(...); using namespace std; +namespace +{ +// RubyDispatcher owns the embedded Ruby VM and runs all Ruby-facing work on +// a single dedicated OS thread. Apache worker threads hand off callable jobs +// so that Ruby APIs are always invoked from a Ruby-managed thread with the +// GVL held. +class RubyDispatcher +{ +public: + static RubyDispatcher& instance() + { + static RubyDispatcher dispatcher; + return dispatcher; + } + + // Launches the dispatcher thread and blocks until the Ruby VM is ready. + void start() + { + std::unique_lock lock(mutex_); + + if (thread_started_) + { + ready_cv_.wait(lock, [this]() { return ready_; }); + return; + } + + shutting_down_ = false; + ready_ = false; + thread_exception_ = nullptr; + worker_thread_ = std::thread(&RubyDispatcher::run, this); + thread_started_ = true; + + ready_cv_.wait(lock, [this]() { return ready_; }); + + if (thread_exception_) + { + std::rethrow_exception(thread_exception_); + } + } + + // Indicates whether the dispatcher thread is currently running. + bool is_started() const + { + std::lock_guard lock(mutex_); + return thread_started_; + } + + // Signals the dispatcher thread to drain pending work and shut down the VM. + void stop() + { + std::unique_lock lock(mutex_); + + if (!thread_started_) + { + return; + } + + shutting_down_ = true; + cv_.notify_all(); + lock.unlock(); + + if (worker_thread_.joinable()) + { + worker_thread_.join(); + } + + lock.lock(); + jobs_.clear(); + thread_started_ = false; + ready_ = false; + thread_exception_ = nullptr; + } + + // Synchronously executes a job on the Ruby dispatcher thread. + int execute(std::function task) + { + if (ruby_thread_has_gvl_p()) + { + return task(); + } + + std::unique_ptr job(new Job()); + auto future = job->promise.get_future(); + job->task = std::move(task); + + { + std::lock_guard lock(mutex_); + + if (!thread_started_) + { + throw std::runtime_error("Ruby dispatcher thread not started"); + } + + jobs_.push_back(std::move(job)); + } + + cv_.notify_one(); + + return future.get(); + } + +private: + struct Job + { + std::function task; + std::promise promise; + }; + + RubyDispatcher() = default; + + // Dispatcher main loop: owns the Ruby VM and processes queued jobs. + void run() + { + try + { + ruby::startup("ModRuby Ruby VM"); + + { + std::lock_guard lock(mutex_); + ready_ = true; + ready_cv_.notify_all(); + } + + while (true) + { + std::unique_ptr job; + + { + std::unique_lock lock(mutex_); + cv_.wait(lock, [this]() { + return shutting_down_ || !jobs_.empty(); + }); + + if (shutting_down_ && jobs_.empty()) + { + break; + } + + job = std::move(jobs_.front()); + jobs_.pop_front(); + } + + try + { + int result = job->task(); + job->promise.set_value(result); + } + catch (...) + { + job->promise.set_exception(std::current_exception()); + } + } + + ruby::shutdown(); + } + catch (...) + { + std::lock_guard lock(mutex_); + thread_exception_ = std::current_exception(); + ready_ = true; + ready_cv_.notify_all(); + } + } + + mutable std::mutex mutex_; + std::condition_variable cv_; + std::condition_variable ready_cv_; + std::deque> jobs_; + std::thread worker_thread_; + bool thread_started_ = false; + bool ready_ = false; + bool shutting_down_ = false; + std::exception_ptr thread_exception_; +}; + +// Runs the provided Apache handler while guaranteeing we are executing on the +// Ruby dispatcher thread (and therefore with the GVL acquired). Callers may be +// running on arbitrary Apache worker threads. +int call_request_with_gvl(int (*func)(request_rec*), request_rec* r) +{ + if (ruby_thread_has_gvl_p()) + { + return func(r); + } + + auto job = [func, r]() -> int { + return func(r); + }; + + return RubyDispatcher::instance().execute(job); +} +} // namespace + // Generic Ruby object to hold the the handler instance. A handler is a Ruby // class which works as the persistent Ruby server environment that handles all // requests. All Apache requests are sent in to it, and it can set up the @@ -78,173 +282,221 @@ int ruby_init_module(apr_pool_t* p, server_rec* server) ap_log_perror( APLOG_MARK, APLOG_NOTICE, 0, p, "mod_ruby[%i]: startup_module", getpid() ); - int x = getpid(); - - // Initialize the Ruby VM and load C extensions try { - // Start up VM - ruby::startup("ModRuby Ruby VM"); - - // Set default encoding to UTF-8. - // - // Ruby docs say not to do this within Ruby. So we do it here - // immediately on startup. Could do it this way: - // - // rb_eval_string("Encoding.default_external='UTF-8'"); - // - // But this is more 3733t - VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding")); - rb_funcall( encoding, rb_intern("default_external="), - 1, rb_str_new_cstr("UTF-8") ); - - // Apache log constants for log() in ruby_request.cpp - rb_define_global_const("APLOG_EMERG", INT2NUM(APLOG_EMERG)); - rb_define_global_const("APLOG_ALERT", INT2NUM(APLOG_ALERT)); - rb_define_global_const("APLOG_CRIT", INT2NUM(APLOG_CRIT)); - rb_define_global_const("APLOG_ERR", INT2NUM(APLOG_ERR)); - rb_define_global_const("APLOG_WARNING", INT2NUM(APLOG_WARNING)); - rb_define_global_const("APLOG_NOTICE", INT2NUM(APLOG_NOTICE)); - rb_define_global_const("APLOG_INFO", INT2NUM(APLOG_INFO)); - rb_define_global_const("APLOG_DEBUG", INT2NUM(APLOG_DEBUG)); - rb_define_global_const("APLOG_LEVELMASK", INT2NUM(APLOG_LEVELMASK)); - rb_define_global_const("APLOG_NOERRNO", INT2NUM(APLOG_NOERRNO)); - rb_define_global_const("APLOG_TOCLIENT", INT2NUM(APLOG_TOCLIENT)); - rb_define_global_const("APLOG_STARTUP", INT2NUM(APLOG_STARTUP)); - - // Apache method constants - rb_define_global_const("M_GET", INT2NUM(M_GET)); - rb_define_global_const("M_PUT", INT2NUM(M_PUT)); - rb_define_global_const("M_POST", INT2NUM(M_POST)); - rb_define_global_const("M_DELETE", INT2NUM(M_DELETE)); - rb_define_global_const("M_CONNECT", INT2NUM(M_CONNECT)); - rb_define_global_const("M_OPTIONS", INT2NUM(M_OPTIONS)); - rb_define_global_const("M_TRACE", INT2NUM(M_TRACE)); - rb_define_global_const("M_PATCH", INT2NUM(M_PATCH)); - rb_define_global_const("M_PROPFIND", INT2NUM(M_PROPFIND)); - rb_define_global_const("M_PROPPATCH", INT2NUM(M_PROPPATCH)); - rb_define_global_const("M_MKCOL", INT2NUM(M_MKCOL)); - rb_define_global_const("M_COPY", INT2NUM(M_COPY)); - rb_define_global_const("M_MOVE", INT2NUM(M_MOVE)); - rb_define_global_const("M_LOCK", INT2NUM(M_LOCK)); - rb_define_global_const("M_UNLOCK", INT2NUM(M_UNLOCK)); - rb_define_global_const("M_VERSION_CONTROL", INT2NUM(M_VERSION_CONTROL)); - rb_define_global_const("M_CHECKOUT", INT2NUM(M_CHECKOUT)); - rb_define_global_const("M_UNCHECKOUT", INT2NUM(M_UNCHECKOUT)); - rb_define_global_const("M_CHECKIN", INT2NUM(M_CHECKIN)); - rb_define_global_const("M_UPDATE", INT2NUM(M_UPDATE)); - rb_define_global_const("M_LABEL", INT2NUM(M_LABEL)); - rb_define_global_const("M_REPORT", INT2NUM(M_REPORT)); - rb_define_global_const("M_MKWORKSPACE", INT2NUM(M_MKWORKSPACE)); - rb_define_global_const("M_MKACTIVITY", INT2NUM(M_MKACTIVITY)); - rb_define_global_const("M_BASELINE_CONTROL", INT2NUM(M_BASELINE_CONTROL)); - rb_define_global_const("M_MERGE", INT2NUM(M_MERGE)); - rb_define_global_const("M_INVALID", INT2NUM(M_INVALID)); - rb_define_global_const("M_METHODS", INT2NUM(64)); - - // Submodules are registered under the Apache namespace - VALUE apache = rb_define_module("Apache"); - - // ModRuby module functions under ModRuby namespace - VALUE ruby = rb_define_module("ModRuby"); - - rb_define_module_function(ruby, "version", (fn)ruby_version, 0); - rb_define_module_function(ruby, "release_date", (fn)ruby_release_date, 0); - - // RHTML scanner - - init_rhtml(); - - // Initialize Ruby extensions/modules - - init_request(apache); // Apache request Ruby wrapper - init_server(apache); // Apache server Ruby wrapper - init_connection(apache); // Apache connection Ruby wrapper - init_process(apache); // Apache process Ruby wrapper - - VALUE apr = rb_define_module("APR"); - - init_apr(apr); // Apache Portable Runtime (APR) - init_apr_array(apr); // APR Arrays - init_apr_file_info(apr); // APR file functions - init_apr_pool(apr); // APR pools - init_apr_table(apr); // APR tables - - // Load the Ruby handler module, which is a pure Ruby class - ruby::require("modruby/handler"); - - // Create the global Ruby handler instance. - ruby_handler = new ruby::Object("ModRuby::Handler"); - } - catch (const ruby::Exception& e) - { - fprintf(stderr, "Ruby Exception: %s", e.what()); - - stringstream strm; - strm << "FATAL ERROR: " << e.what(); - log_error(p, strm.str().c_str()); - - return 1; + RubyDispatcher::instance().start(); } catch (const std::exception& e) { - fprintf(stderr, "C++ Exception: %s\n", e.what()); - - stringstream strm; - strm << "FATAL ERROR: " << e.what(); - log_error(p, strm.str().c_str()); - + ap_log_perror( APLOG_MARK, APLOG_CRIT, 0, p, + "mod_ruby[%i]: failed to start Ruby dispatcher: %s", + getpid(), e.what() ); return 1; } - return 0; -} - -int ruby_shutdown_module() -{ - // Call shutdown on all handlers so they can clean up - map::iterator i; - for(i = handlers.begin(); i != handlers.end(); i++) + auto init_job = [p, server]() -> int { - ap_log_error( APLOG_MARK, APLOG_NOTICE, 0, NULL, - "mod_ruby[%i]: ruby_shutdown_module() starting", - getpid() ); - try { - i->second->method("shutdown", 0); + // Set default encoding to UTF-8. + // + // Ruby docs say not to do this within Ruby. So we do it here + // immediately on startup. Could do it this way: + // + // rb_eval_string("Encoding.default_external='UTF-8'"); + // + // But this is more 3733t + VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding")); + rb_funcall( encoding, rb_intern("default_external="), + 1, rb_str_new_cstr("UTF-8") ); + + // Apache log constants for log() in ruby_request.cpp + rb_define_global_const("APLOG_EMERG", INT2NUM(APLOG_EMERG)); + rb_define_global_const("APLOG_ALERT", INT2NUM(APLOG_ALERT)); + rb_define_global_const("APLOG_CRIT", INT2NUM(APLOG_CRIT)); + rb_define_global_const("APLOG_ERR", INT2NUM(APLOG_ERR)); + rb_define_global_const("APLOG_WARNING", INT2NUM(APLOG_WARNING)); + rb_define_global_const("APLOG_NOTICE", INT2NUM(APLOG_NOTICE)); + rb_define_global_const("APLOG_INFO", INT2NUM(APLOG_INFO)); + rb_define_global_const("APLOG_DEBUG", INT2NUM(APLOG_DEBUG)); + rb_define_global_const("APLOG_LEVELMASK", INT2NUM(APLOG_LEVELMASK)); + rb_define_global_const("APLOG_NOERRNO", INT2NUM(APLOG_NOERRNO)); + rb_define_global_const("APLOG_TOCLIENT", INT2NUM(APLOG_TOCLIENT)); + rb_define_global_const("APLOG_STARTUP", INT2NUM(APLOG_STARTUP)); + + // Apache method constants + rb_define_global_const("M_GET", INT2NUM(M_GET)); + rb_define_global_const("M_PUT", INT2NUM(M_PUT)); + rb_define_global_const("M_POST", INT2NUM(M_POST)); + rb_define_global_const("M_DELETE", INT2NUM(M_DELETE)); + rb_define_global_const("M_CONNECT", INT2NUM(M_CONNECT)); + rb_define_global_const("M_OPTIONS", INT2NUM(M_OPTIONS)); + rb_define_global_const("M_TRACE", INT2NUM(M_TRACE)); + rb_define_global_const("M_PATCH", INT2NUM(M_PATCH)); + rb_define_global_const("M_PROPFIND", INT2NUM(M_PROPFIND)); + rb_define_global_const("M_PROPPATCH", INT2NUM(M_PROPPATCH)); + rb_define_global_const("M_MKCOL", INT2NUM(M_MKCOL)); + rb_define_global_const("M_COPY", INT2NUM(M_COPY)); + rb_define_global_const("M_MOVE", INT2NUM(M_MOVE)); + rb_define_global_const("M_LOCK", INT2NUM(M_LOCK)); + rb_define_global_const("M_UNLOCK", INT2NUM(M_UNLOCK)); + rb_define_global_const("M_VERSION_CONTROL", INT2NUM(M_VERSION_CONTROL)); + rb_define_global_const("M_CHECKOUT", INT2NUM(M_CHECKOUT)); + rb_define_global_const("M_UNCHECKOUT", INT2NUM(M_UNCHECKOUT)); + rb_define_global_const("M_CHECKIN", INT2NUM(M_CHECKIN)); + rb_define_global_const("M_UPDATE", INT2NUM(M_UPDATE)); + rb_define_global_const("M_LABEL", INT2NUM(M_LABEL)); + rb_define_global_const("M_REPORT", INT2NUM(M_REPORT)); + rb_define_global_const("M_MKWORKSPACE", INT2NUM(M_MKWORKSPACE)); + rb_define_global_const("M_MKACTIVITY", INT2NUM(M_MKACTIVITY)); + rb_define_global_const("M_BASELINE_CONTROL", INT2NUM(M_BASELINE_CONTROL)); + rb_define_global_const("M_MERGE", INT2NUM(M_MERGE)); + rb_define_global_const("M_INVALID", INT2NUM(M_INVALID)); + rb_define_global_const("M_METHODS", INT2NUM(64)); + + // Submodules are registered under the Apache namespace + VALUE apache = rb_define_module("Apache"); + + // ModRuby module functions under ModRuby namespace + VALUE ruby = rb_define_module("ModRuby"); + + rb_define_module_function(ruby, "version", (fn)ruby_version, 0); + rb_define_module_function(ruby, "release_date", (fn)ruby_release_date, 0); + + // RHTML scanner + + init_rhtml(); + + // Initialize Ruby extensions/modules + + init_request(apache); // Apache request Ruby wrapper + init_server(apache); // Apache server Ruby wrapper + init_connection(apache); // Apache connection Ruby wrapper + init_process(apache); // Apache process Ruby wrapper + + VALUE apr = rb_define_module("APR"); + + init_apr(apr); // Apache Portable Runtime (APR) + init_apr_array(apr); // APR Arrays + init_apr_file_info(apr); // APR file functions + init_apr_pool(apr); // APR pools + init_apr_table(apr); // APR tables + + // Load the Ruby handler module, which is a pure Ruby class + ruby::require("modruby/handler"); + + // Create the global Ruby handler instance. + ruby_handler = new ruby::Object("ModRuby::Handler"); } catch (const ruby::Exception& e) { - // Create the error message + fprintf(stderr, "Ruby Exception: %s", e.what()); + stringstream strm; - strm << "ruby_shutdown_module(): Ruby Exception: " << e.what() << "\n" - << e.stackdump(); + strm << "FATAL ERROR: " << e.what(); + log_error(p, strm.str().c_str()); - // Log error (critical) - ap_log_error( APLOG_MARK, APLOG_CRIT, 0, NULL, - "mod_ruby[%i] : %s", - getpid(), - strm.str().c_str() ); + return 1; } catch (const std::exception& e) { - fprintf(stderr, "ruby_shutdown_module(): C++ Exception\n"); + fprintf(stderr, "C++ Exception: %s\n", e.what()); + + stringstream strm; + strm << "FATAL ERROR: " << e.what(); + log_error(p, strm.str().c_str()); + + return 1; } + + return 0; + }; + + try + { + return RubyDispatcher::instance().execute(init_job); + } + catch (const std::exception& e) + { + ap_log_perror( APLOG_MARK, APLOG_CRIT, 0, p, + "mod_ruby[%i]: initialization failure: %s", + getpid(), e.what() ); + return 1; } +} - if (ruby_handler != NULL) +int ruby_shutdown_module() +{ + if (!RubyDispatcher::instance().is_started()) { - ruby_handler->method("shutdown", 0); + return 0; + } + + auto shutdown_job = []() -> int + { + // Call shutdown on all handlers so they can clean up + map::iterator i; + for(i = handlers.begin(); i != handlers.end(); i++) + { + ap_log_error( APLOG_MARK, APLOG_NOTICE, 0, NULL, + "mod_ruby[%i]: ruby_shutdown_module() starting", + getpid() ); - delete ruby_handler; - ruby_handler = NULL; + try + { + i->second->method("shutdown", 0); + } + catch (const ruby::Exception& e) + { + // Create the error message + stringstream strm; + strm << "ruby_shutdown_module(): Ruby Exception: " << e.what() << "\n" + << e.stackdump(); + + // Log error (critical) + ap_log_error( APLOG_MARK, APLOG_CRIT, 0, NULL, + "mod_ruby[%i] : %s", + getpid(), + strm.str().c_str() ); + } + catch (const std::exception&) + { + fprintf(stderr, "ruby_shutdown_module(): C++ Exception\n"); + } + + delete i->second; + } + + if (ruby_handler != NULL) + { + ruby_handler->method("shutdown", 0); + + delete ruby_handler; + ruby_handler = NULL; + } + + handlers.clear(); + + return 0; + }; + + int result = 0; + + try + { + result = RubyDispatcher::instance().execute(shutdown_job); + } + catch (const std::exception& e) + { + ap_log_error( APLOG_MARK, APLOG_CRIT, 0, NULL, + "mod_ruby[%i]: shutdown failure: %s", + getpid(), e.what() ); + result = 1; } - // Shutdown Ruby environment - ruby::shutdown(); + RubyDispatcher::instance().stop(); - return 0; + return result; } int ruby_log_error(request_rec* r, int level, const char* msg) @@ -607,7 +859,9 @@ modruby::Handler ruby_request_get_access_handler(request_rec* r) return modruby::Handler(); } -int ruby_request_handler(request_rec* r) +// Core HTTP handler implementation that performs the request lifecycle using +// Ruby objects. This must execute on the Ruby dispatcher thread. +static int ruby_request_handler_impl(request_rec* r) { apache::Request req(r); @@ -733,6 +987,12 @@ int ruby_request_handler(request_rec* r) } } +// Public Apache handler entry point. It delegates to the Ruby dispatcher so +// the implementation runs with the GVL. +int ruby_request_handler(request_rec* r) +{ + return call_request_with_gvl(ruby_request_handler_impl, r); +} // Common code used for RHTML and Ruby script handlers int ruby_generic_handler( request_rec* r, const char* handler_name, @@ -846,17 +1106,32 @@ int ruby_generic_handler( request_rec* r, } } -int ruby_request_rhtml_handler(request_rec* r) +// Implementation for the RHTML content handler that expects the GVL to be held. +static int ruby_request_rhtml_handler_impl(request_rec* r) { return ruby_generic_handler(r, "ruby-rhtml-handler", "rhtml"); } -int ruby_request_script_handler(request_rec* r) +// Apache hook wrapper for the RHTML handler that ensures execution on the Ruby thread. +int ruby_request_rhtml_handler(request_rec* r) +{ + return call_request_with_gvl(ruby_request_rhtml_handler_impl, r); +} + +// Implementation for the Ruby script handler that requires Ruby VM access. +static int ruby_request_script_handler_impl(request_rec* r) { return ruby_generic_handler(r, "ruby-script-handler", "script"); } -int ruby_request_access_handler(request_rec* r) +// Apache hook wrapper for the script handler that marshals work through the dispatcher. +int ruby_request_script_handler(request_rec* r) +{ + return call_request_with_gvl(ruby_request_script_handler_impl, r); +} + +// Implementation for the access handler hook. Assumes it runs on the Ruby thread. +static int ruby_request_access_handler_impl(request_rec* r) { apache::Request req(r); @@ -964,3 +1239,9 @@ int ruby_request_access_handler(request_rec* r) return OK; } } + +// Apache hook wrapper for the access handler that schedules execution with the GVL. +int ruby_request_access_handler(request_rec* r) +{ + return call_request_with_gvl(ruby_request_access_handler_impl, r); +} diff --git a/src/ruby.cpp b/src/ruby.cpp index 70726f4..c38457f 100644 --- a/src/ruby.cpp +++ b/src/ruby.cpp @@ -1,11 +1,90 @@ #include #include +#include #include #include "ruby.hpp" using std::string; +extern "C" int ruby_thread_has_gvl_p(void); + +namespace +{ +// Holds the parameters/result for invoking rb_protect under the dispatcher. +struct ProtectCallContext +{ + VALUE (*func)(VALUE); + VALUE arg; + int error; + VALUE result; +}; + +// Trampoline that re-enters rb_protect while the GVL is held. +static void* protect_with_gvl(void* data) +{ + ProtectCallContext* ctx = static_cast(data); + ctx->error = 0; + ctx->result = rb_protect(ctx->func, ctx->arg, &ctx->error); + + return NULL; +} + +// Calls the supplied Ruby C function inside rb_protect, reacquiring the GVL +// if the current native thread does not already own it. +static VALUE call_with_protect(VALUE (*func)(VALUE), VALUE arg, int* error) +{ + ProtectCallContext ctx { func, arg, 0, Qnil }; + + if (ruby_thread_has_gvl_p()) + { + ctx.error = 0; + ctx.result = rb_protect(func, arg, &ctx.error); + } + else + { + rb_thread_call_with_gvl(protect_with_gvl, &ctx); + } + + if (error != NULL) + { + *error = ctx.error; + } + + return ctx.result; +} + +// Context structure for safely calling rb_load_protect from non-Ruby threads. +struct LoadCallContext +{ + const char* filename; + int anonymous; + int error; +}; + +// Executes rb_load_protect while the GVL is held. +static void* load_with_gvl(void* data) +{ + LoadCallContext* ctx = static_cast(data); + ctx->error = 0; + rb_load_protect(rb_str_new_cstr(ctx->filename), ctx->anonymous, &ctx->error); + return NULL; +} + +// Runs the given function while ensuring the GVL is owned by this thread. +static void perform_with_gvl(void* (*func)(void*), void* data) +{ + if (ruby_thread_has_gvl_p()) + { + func(data); + } + else + { + rb_thread_call_with_gvl(func, data); + } +} +} // namespace + namespace ruby { @@ -60,9 +139,9 @@ VALUE Object::method(const char* name, int n, ...) arg.argv = argv; int error = 0; - VALUE result = rb_protect( ruby::method_wrap, - reinterpret_cast(&arg), - &error ); + VALUE result = call_with_protect( ruby::method_wrap, + reinterpret_cast(&arg), + &error ); if (error) { @@ -435,7 +514,7 @@ VALUE method(VALUE recv, ID id, int n, ...) arg.argv = argv; int error = 0; - VALUE result = rb_protect(method_wrap, reinterpret_cast(&arg), &error); + VALUE result = call_with_protect(method_wrap, reinterpret_cast(&arg), &error); if (error) { @@ -469,7 +548,7 @@ VALUE vm_method(VALUE recv, ID id, int n, va_list ar) arg.argv = argv; int error = 0; - VALUE result = rb_protect(method_wrap, reinterpret_cast(&arg), &error); + VALUE result = call_with_protect(method_wrap, reinterpret_cast(&arg), &error); if (error) { @@ -546,7 +625,7 @@ bool call_function(const char* method, int n, ...) void require(const char* filename) { int error = 0; - rb_protect(require_protect, reinterpret_cast(filename), &error); + call_with_protect(require_protect, reinterpret_cast(filename), &error); if (error) { @@ -562,10 +641,10 @@ void require(const char* filename) void load(const char* filename, int anonymous) { - int error = 0; - rb_load_protect(rb_str_new2(filename), anonymous, &error); + LoadCallContext ctx { filename, anonymous, 0 }; + perform_with_gvl(load_with_gvl, &ctx); - if (error) + if (ctx.error) { Exception e; e.backtrace(); @@ -618,7 +697,7 @@ VALUE create_object(const char* class_name, int n, va_list ar) arg.argv = argv; int error = 0; - VALUE self = rb_protect(create_object_protect, reinterpret_cast(&arg), &error); + VALUE self = call_with_protect(create_object_protect, reinterpret_cast(&arg), &error); if (error) { From 2cba98fd5cb0820ef03a4e16ee049a41a34316a7 Mon Sep 17 00:00:00 2001 From: Cal Heldenbrand Date: Thu, 23 Oct 2025 10:06:02 -0500 Subject: [PATCH 5/6] Switch back to Ruby 3.2.3 --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6fa22d8..8797737 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,9 +55,9 @@ RUN rvm requirements # Pick your ruby version here # Segfaults in rb_protect() -#RUN rvm install ruby-3.2.3 +RUN rvm install ruby-3.2.3 # Works -RUN rvm install ruby-2.7.5 +#RUN rvm install ruby-2.7.5 # Same segfault as 3.2.3 #RUN rvm install ruby-3.0.3 From 5c97b88bc461897c5dfa74b7d4603eb79c89be3f Mon Sep 17 00:00:00 2001 From: Cal Heldenbrand Date: Fri, 24 Oct 2025 12:59:27 -0500 Subject: [PATCH 6/6] Adding extra readme for Codex --- README.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a48868b..9c0dd7f 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ The build will create these files in the local repo: To build from source, you need to the following packages: - * Ruby 1.9 or 2.x. (Ruby header files also needed). + * Ruby 2.x or 3.x. (Ruby header files also needed). * Apache, APR and APR Util headers * CMake @@ -69,7 +69,7 @@ using the Docker image as a handy developer's environment: bash $ ./script/docker_run ... - 0x00007f30bb13733f in accept4 () from /lib64/libc.so.6 + 0x00007f7a7adfc7b4 in read () from /lib64/libpthread.so.0 A single Apache worker child is running in a gdb shell. Smoke test mod_ruby in a separate terminal window: @@ -77,7 +77,7 @@ separate terminal window: bash $ curl localhost:8080 Hi there from ruby Hello World from HTML! - + If mod_ruby crashes, gdb will print a full stack trace. You may also do `Ctrl + C` to break out to a gdb prompt to inspect the running Apache child. @@ -87,6 +87,60 @@ cases. Submit pull requests using feature branches, and have fun! +### Developing with OpenAI Codex + +Working with the Ruby C API can be frustrating. Documentation is sparse +and a lot of the experience is trial & error. + +This project integrates OpenAI's Codex for an agentic approach. There +is a separate `Dockerfile.codex` which loads in the full Ruby C++ headers +and source code and installs the Codex CLI. The project root contains +an AGENTS.md system prompt to tell the LLM how to compile, install +and test ModRuby. It can run web searches, install packages as root, +search the Ruby source and behave like an entitled twerp. + +Apache is started up with the container and will run in a reload loop +if it crashes. The agent is given instructions to test changes and +inspect the stack trace if Apache segfaults. It should keep iterating +until the goal is met, which could be a very long time. + +**Use with caution. Don't leave it run unattended.** + +First, install [Codex CLI](https://developers.openai.com/codex/cli/) +on your workstation and get the credentials set up by logging into ChatGPT. +The `ModRuby` container bind mounts your workstation's `$HOME/.codex` +into the container and runs the agent inside the container. The `ModRuby` +project root is bind mounted into `/usr/src/mod_ruby` so changes made by the +agent will be reflected immediately on your host OS. + +Start up the container with: + + bash $ ./script/docker_codex + . . . + To get started, describe a task or try one of these commands: + > + +Go wild and have fun. + +``` +> Search for a potential security vulnerability in ModRuby. Identify the + attack vector and write a test case to confirm the vulnerability. Write + a patch to fix the vulnerability and supply code comments. +``` + +``` +Updated Plan + High-level roadmap before diving into the source and modifications. + Survey key ModRuby modules for input handling to spot potential security issues. + Create a regression test that demonstrates the vulnerability. + Implement a fix with comments, rebuild, and rerun the new test. + +I'm focusing on confirming an overflow issue in url_encode by running a +death test with MALLOC_CHECK set to catch heap errors. I'm thinking about +creating a dedicated test compiled with AddressSanitizer to catch the integer +overflow vulnerability in url_encode by triggering a heap-buffer-overflow if present. +``` + ## License Redistribution and use in source and binary forms, with or without modification,