From f80c0f342e3b181db808da16365ea149e2008f3d Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Mon, 9 Mar 2026 22:11:42 +0100 Subject: [PATCH 01/17] Updates --- external/rsh | 2 +- rcp/R/compile.R | 2 +- rcp/inst/perf/.gitignore | 1 + rcp/inst/perf/analyze-perf.R | 503 +++++++++++++++++++++++++++++++++++ rcp/inst/perf/harness.R | 61 +++++ rcp/inst/perf/run-perf.sh | 271 +++++++++++++++++++ 6 files changed, 838 insertions(+), 2 deletions(-) create mode 100644 rcp/inst/perf/.gitignore create mode 100755 rcp/inst/perf/analyze-perf.R create mode 100644 rcp/inst/perf/harness.R create mode 100755 rcp/inst/perf/run-perf.sh diff --git a/external/rsh b/external/rsh index 71633e0..a9264b0 160000 --- a/external/rsh +++ b/external/rsh @@ -1 +1 @@ -Subproject commit 71633e0fb17f48b7b94f3974535411090f88f3c1 +Subproject commit a9264b0c525f5763a1029b4b7afd00c4aab62702 diff --git a/rcp/R/compile.R b/rcp/R/compile.R index 4bec834..0e1228d 100644 --- a/rcp/R/compile.R +++ b/rcp/R/compile.R @@ -46,7 +46,7 @@ rcp_jit_disable <- function() { #' @return A list with counts of successfully compiled and failed functions #' @export rcp_cmppkg <- function(package) { - .Call(C_rcp_cmppkg, package) + invisible(.Call(C_rcp_cmppkg, package)) } #' Get profiling data from RCP diff --git a/rcp/inst/perf/.gitignore b/rcp/inst/perf/.gitignore new file mode 100644 index 0000000..fbca225 --- /dev/null +++ b/rcp/inst/perf/.gitignore @@ -0,0 +1 @@ +results/ diff --git a/rcp/inst/perf/analyze-perf.R b/rcp/inst/perf/analyze-perf.R new file mode 100755 index 0000000..ca5ca62 --- /dev/null +++ b/rcp/inst/perf/analyze-perf.R @@ -0,0 +1,503 @@ +#!/usr/bin/env Rscript +#' Analyze perf.jit.data: categorize time by JIT R code, GC, alloc, C code, etc. +#' +#' Usage: Rscript analyze-perf.R [--benchmark-only] [--top N] + +suppressPackageStartupMessages({ + library(tidyverse) + library(stringr) +}) + +# --------------------------------------------------------------------------- +# Category definitions (data-driven, order = priority) +# --------------------------------------------------------------------------- + +CATEGORY_RULES <- tribble( + ~category, ~patterns, + "gc", list(c("R_gc_internal", "do_gc", "AgeNodeAndChildren", + "TryToReleasePages*", "mark_phase", "RunGenCollect")), + "alloc", list(c("Rf_allocVector*", "CONS_NR", "GetNewPage", "Rf_mkPROMISE", + "_int_malloc", "_int_free*", "malloc", "free", "calloc", "realloc")), + "env-lookup", list(c("Rf_findVarInFrame*", "findVarLocInFrame*", "R_findVar*", + "Rf_findFun*", "GET_BINDING_CELL", "R_envHasNoSpecialSymbols")), + "arg-matching", list(c("Rf_matchArgs*", "Rf_checkArityCall")), + "call-trampoline", list(c("Rsh_Call", "Rsh_closure_call_args", "Rsh_builtin_call_args", + "rcpEval", "rcpCallNative*")), + "bc-interpreter", list(c("bcEval*")), + "r-vm", list(c("Rf_eval", "forcePromise", "R_execClosure", "applyClosure*", + "R_cmpfun*", "Rf_ReplIteration")), + "context-mgmt", list(c("Rf_begincontext", "Rf_endcontext", "Rf_NewEnvironment", + "make_applyClosure_env", "R_BCRelPC", "R_BCVersionOK")), +) + +# Flatten patterns into regex for each category (glob -> regex) +glob_to_regex <- function(pat) { + # Use glob2rx from utils, which handles fnmatch-style * and ? correctly + # glob2rx("foo*") -> "^foo" (with trimhead/trimtail defaults) + # We want full anchored regex like fnmatch + utils::glob2rx(pat) +} + +CATEGORY_RULES <- CATEGORY_RULES |> + mutate( + patterns = map(patterns, ~ .x[[1]]), + regex = map_chr(patterns, ~ paste0(map_chr(.x, glob_to_regex), collapse = "|")) + ) + +ALL_CATEGORIES <- c( + "jit-r-code", "jit-unresolved", "gc", "alloc", "env-lookup", + "arg-matching", "call-trampoline", "bc-interpreter", "r-vm", + "context-mgmt", "rcp-runtime", "c-builtins", "system", "unmatched" +) + +INFRA_PACKAGES <- c( + "base", "utils", "stats", "methods", "grDevices", "graphics", + "datasets", "tools", "compiler", "grid", "parallel", "splines", + "stats4", "tcltk" +) + +# --------------------------------------------------------------------------- +# DSO helpers (vectorized) +# --------------------------------------------------------------------------- + +is_r_binary <- function(dso) grepl("(/R$|/R\\.bin$)", dso) +is_jitted <- function(dso) grepl("jitted-", dso, fixed = TRUE) & grepl("\\.so$", dso) +is_rcp_so <- function(dso) grepl("rcp\\.so$", dso) + +is_system_dso <- function(dso) { + dso == "[unknown]" | + grepl("libc\\.so|libm\\.so|libpthread|ld-linux|libdeflate|\\[kernel|libgcc|libstdc\\+\\+|\\[vdso\\]|libdl", dso) +} + +# --------------------------------------------------------------------------- +# Vectorized parsing of perf script output +# --------------------------------------------------------------------------- + +parse_perf_script <- function(perf_data_file) { + # Check if a cached perf script text file exists + txt_file <- sub("\\.[^.]+$", ".txt", perf_data_file) + if (file.exists(txt_file)) { + message("Reading cached perf script output from ", txt_file, " ...") + lines <- readLines(txt_file) + } else { + message("Running perf script on ", perf_data_file, " ...") + lines <- system2("perf", c("script", "-i", perf_data_file), + stdout = TRUE, stderr = FALSE) + # Cache for reproducibility + message("Caching perf script output to ", txt_file, " ...") + writeLines(lines, txt_file) + } + message("Read ", length(lines), " lines, parsing ...") + + # Identify header lines vs frame lines + # Header lines: non-empty, don't start with space/tab + n <- length(lines) + empty <- nchar(lines) == 0L + first_char <- substr(lines, 1, 1) + is_header <- !empty & first_char != " " & first_char != "\t" + + # Parse frame lines + is_frame <- !empty & !is_header + frame_idx <- which(is_frame) + frame_matches <- str_match(lines[frame_idx], "^\\s+(\\S+)\\s+(.+)\\s+\\((.+)\\)\\s*$") + + valid <- !is.na(frame_matches[, 1]) + frame_idx <- frame_idx[valid] + frame_addr <- frame_matches[valid, 2] + frame_sym <- sub("\\+0x[0-9a-fA-F]+$", "", str_trim(frame_matches[valid, 3])) + frame_dso <- frame_matches[valid, 4] + + # Assign each line to a sample_id: cumsum of header lines + header_cumsum <- cumsum(is_header) + frame_sample_id <- header_cumsum[frame_idx] + + # Build frames tibble + frames_tbl <- tibble( + sample_id = frame_sample_id, + sym = frame_sym, + dso = frame_dso, + addr = frame_addr + ) + + # Frame position within each sample (1 = leaf) + frames_tbl <- frames_tbl |> + group_by(sample_id) |> + mutate(frame_pos = row_number()) |> + ungroup() + + # Extract headers and timestamps + header_idx <- which(is_header) + headers <- lines[header_idx] + ts_match <- str_match(headers, "\\s(\\d+\\.\\d+):") + timestamps <- as.numeric(ts_match[, 2]) + + samples <- tibble( + sample_id = seq_along(headers), + header = headers, + timestamp = timestamps + ) + + # Only keep samples that have frames + samples_with_frames <- unique(frames_tbl$sample_id) + samples <- samples |> filter(sample_id %in% samples_with_frames) + + list(samples = samples, frames = frames_tbl) +} + +# --------------------------------------------------------------------------- +# Classification (vectorized where possible, row-wise where needed) +# --------------------------------------------------------------------------- + +classify_samples <- function(samples, frames) { + # Get leaf frame for each sample + leaf <- frames |> + filter(frame_pos == 1) |> + select(sample_id, leaf_sym = sym, leaf_dso = dso) + + samples <- samples |> left_join(leaf, by = "sample_id") + + # Step 1: DSO-based classification + samples <- samples |> + mutate( + is_jit = is_jitted(leaf_dso), + is_unknown = leaf_dso == "[unknown]", + is_r = is_r_binary(leaf_dso) | leaf_dso == "inlined" | is_rcp_so(leaf_dso), + is_sys = is_system_dso(leaf_dso), + is_rcp = is_rcp_so(leaf_dso) + ) + + # For [unknown] DSO samples, check if JIT caller or Rsh_Call in stack + unknown_ids <- samples$sample_id[samples$is_unknown] + if (length(unknown_ids) > 0) { + caller_frames <- frames |> + filter(sample_id %in% unknown_ids, frame_pos > 1) + + jit_context_unknown <- caller_frames |> + group_by(sample_id) |> + summarise( + has_jit_caller = any(is_jitted(dso)), + has_rsh_caller = any(grepl("Rsh_Call", sym, fixed = TRUE)), + .groups = "drop" + ) + + samples <- samples |> + left_join(jit_context_unknown, by = "sample_id") |> + mutate( + has_jit_caller = replace_na(has_jit_caller, FALSE), + has_rsh_caller = replace_na(has_rsh_caller, FALSE) + ) + } else { + samples <- samples |> + mutate(has_jit_caller = FALSE, has_rsh_caller = FALSE) + } + + # Step 2: Symbol-based matching for R/rcp/inlined DSOs + # Build category column for symbol-based matches + sym_category <- rep(NA_character_, nrow(samples)) + for (i in seq_len(nrow(CATEGORY_RULES))) { + matched <- grepl(CATEGORY_RULES$regex[i], samples$leaf_sym) & samples$is_r & is.na(sym_category) + sym_category[matched] <- CATEGORY_RULES$category[i] + } + + # Also check alloc patterns for system DSOs + alloc_regex <- CATEGORY_RULES$regex[CATEGORY_RULES$category == "alloc"] + sys_alloc <- grepl(alloc_regex, samples$leaf_sym) & samples$is_sys + + # Final category assignment + samples <- samples |> + mutate( + sym_cat = sym_category, + category = case_when( + is_jit ~ "jit-r-code", + is_unknown & (has_jit_caller | has_rsh_caller) ~ "jit-unresolved", + is_unknown ~ "system", + is_r & !is.na(sym_cat) ~ sym_cat, + is_r & is_rcp ~ "rcp-runtime", + is_r ~ "c-builtins", + is_sys & sys_alloc ~ "alloc", + is_sys ~ "system", + TRUE ~ "unmatched" + ) + ) + + # Find JIT context: first jitted frame in each sample's stack + jit_frames <- frames |> + filter(is_jitted(dso)) |> + group_by(sample_id) |> + slice_min(frame_pos, n = 1, with_ties = FALSE) |> + ungroup() |> + select(sample_id, jit_context = sym) + + samples <- samples |> + left_join(jit_frames, by = "sample_id") |> + mutate( + package = case_when( + is.na(jit_context) ~ "", + grepl("::", jit_context, fixed = TRUE) ~ sub("::.*", "", jit_context), + TRUE ~ "" + ) + ) + + # Clean up temp columns + samples |> + select(sample_id, timestamp, leaf_sym, leaf_dso, category, jit_context, package) +} + +# --------------------------------------------------------------------------- +# Benchmark-only detection +# --------------------------------------------------------------------------- + +detect_benchmark_range <- function(samples, frames) { + jit_frames <- frames |> filter(is_jitted(dso)) + + if (nrow(jit_frames) == 0) return(c(NA_real_, NA_real_)) + + user_jit <- jit_frames |> + mutate( + pkg = if_else(grepl("::", sym, fixed = TRUE), sub("::.*", "", sym), sym) + ) |> + filter(!(pkg %in% INFRA_PACKAGES)) + + if (nrow(user_jit) == 0) return(c(NA_real_, NA_real_)) + + user_sample_ids <- unique(user_jit$sample_id) + ts <- samples$timestamp[samples$sample_id %in% user_sample_ids] + ts <- ts[!is.na(ts)] + + if (length(ts) == 0) return(c(NA_real_, NA_real_)) + c(min(ts), max(ts)) +} + +# --------------------------------------------------------------------------- +# Output formatting +# --------------------------------------------------------------------------- + +print_table <- function(headers, df, col_widths = NULL) { + mat <- as.matrix(df) + if (is.null(col_widths)) { + col_widths <- vapply(seq_along(headers), function(i) { + max(nchar(headers[i]), max(nchar(mat[, i]), na.rm = TRUE)) + 2L + }, integer(1)) + } + + header_line <- "" + for (i in seq_along(headers)) { + if (i == 1) { + header_line <- paste0(header_line, str_pad(headers[i], col_widths[i], "right")) + } else { + header_line <- paste0(header_line, str_pad(headers[i], col_widths[i], "left")) + } + } + cat(header_line, "\n") + cat(strrep("\u2500", nchar(header_line)), "\n") + + for (r in seq_len(nrow(mat))) { + line <- "" + for (i in seq_along(headers)) { + val <- mat[r, i] + if (i == 1) { + line <- paste0(line, str_pad(val, col_widths[i], "right")) + } else { + line <- paste0(line, str_pad(val, col_widths[i], "left")) + } + } + cat(line, "\n") + } +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +main <- function() { + args <- commandArgs(trailingOnly = TRUE) + + perf_data <- NULL + benchmark_only <- FALSE + top_n <- 30L + + i <- 1L + while (i <= length(args)) { + if (args[i] == "--benchmark-only") { + benchmark_only <- TRUE + } else if (args[i] == "--top") { + i <- i + 1L + top_n <- as.integer(args[i]) + } else if (is.null(perf_data)) { + perf_data <- args[i] + } else { + stop("Unexpected argument: ", args[i]) + } + i <- i + 1L + } + + if (is.null(perf_data)) { + cat("Usage: Rscript analyze-perf.R [--benchmark-only] [--top N]\n", + file = stderr()) + quit(status = 1) + } + + if (!file.exists(perf_data)) { + cat("Error:", perf_data, "not found\n", file = stderr()) + quit(status = 1) + } + + perf_dir <- dirname(perf_data) + + # Parse + parsed <- parse_perf_script(perf_data) + all_samples <- parsed$samples + all_frames <- parsed$frames + message("Parsed ", nrow(all_samples), " samples") + + # Benchmark-only filtering + if (benchmark_only) { + range <- detect_benchmark_range(all_samples, all_frames) + if (is.na(range[1])) { + message("Warning: no user JIT functions found, showing all samples") + } else { + message(sprintf("Benchmark range: %.6f - %.6f (%.3fs)", + range[1], range[2], range[2] - range[1])) + keep_ids <- all_samples$sample_id[ + !is.na(all_samples$timestamp) & + all_samples$timestamp >= range[1] & + all_samples$timestamp <= range[2] + ] + all_samples <- all_samples |> filter(sample_id %in% keep_ids) + all_frames <- all_frames |> filter(sample_id %in% keep_ids) + } + } + + total <- nrow(all_samples) + message("Samples after filtering: ", total) + + if (total == 0) { + cat("No samples found.\n") + quit(status = 0) + } + + # Classify + message("Classifying ...") + samples <- classify_samples(all_samples, all_frames) + + # ── CSV output ── + write_csv(samples, file.path(perf_dir, "samples.csv")) + + # ── Table 1: Category breakdown ── + cat("\n") + cat(strrep("=", 60), "\n") + cat(" Category Breakdown\n") + cat(strrep("=", 60), "\n") + + cat_counts <- samples |> + count(category) |> + mutate(pct = sprintf("%.1f%%", 100 * n / total)) + + cat_order <- tibble(category = ALL_CATEGORIES, ord = seq_along(ALL_CATEGORIES)) + cat_display <- cat_counts |> + left_join(cat_order, by = "category") |> + mutate(ord = if_else(is.na(ord), 999L, as.integer(ord))) |> + arrange(ord) |> + filter(n > 0) |> + select(category, n, pct) |> + bind_rows(tibble(category = "TOTAL", n = total, pct = "100.0%")) + + print_table(c("Category", "Samples", "%"), + cat_display |> mutate(n = as.character(n))) + cat("\n") + + write_csv(cat_display |> filter(category != "TOTAL"), + file.path(perf_dir, "category_summary.csv")) + + # ── Table 2: By R Package ── + display_cats <- intersect(ALL_CATEGORIES, unique(samples$category)) + + cat(strrep("=", 60), "\n") + cat(" By R Package\n") + cat(strrep("=", 60), "\n") + + pkg_summary <- samples |> + count(package, category) |> + pivot_wider(names_from = category, values_from = n, values_fill = 0) + + pkg_summary <- pkg_summary |> + mutate(Total = rowSums(across(where(is.numeric)))) |> + arrange(desc(Total)) + + for (cc in display_cats) { + if (!(cc %in% names(pkg_summary))) pkg_summary[[cc]] <- 0L + } + + pkg_out <- pkg_summary |> + select(Package = package, all_of(display_cats), Total) |> + mutate(across(where(is.numeric), ~ if_else(.x == 0, "", as.character(.x)))) + + print_table(names(pkg_out), pkg_out) + cat("\n") + + # ── Table 3: Top R Functions ── + cat(strrep("=", 60), "\n") + cat(sprintf(" Top %d R Functions (by total samples)\n", top_n)) + cat(strrep("=", 60), "\n") + + func_summary <- samples |> + mutate(jit_context = if_else(is.na(jit_context), "", jit_context)) |> + count(jit_context, category) |> + pivot_wider(names_from = category, values_from = n, values_fill = 0) + + func_summary <- func_summary |> + mutate(Total = rowSums(across(where(is.numeric)))) |> + arrange(desc(Total)) |> + slice_head(n = top_n) + + for (cc in display_cats) { + if (!(cc %in% names(func_summary))) func_summary[[cc]] <- 0L + } + + func_out <- func_summary |> + mutate(`%` = sprintf("%.1f%%", 100 * Total / total)) |> + select(`R Function` = jit_context, all_of(display_cats), Total, `%`) |> + mutate(across(where(is.numeric), ~ if_else(.x == 0, "", as.character(.x)))) + + print_table(names(func_out), func_out) + cat("\n") + + # ── Table 4: C-builtins detail ── + cbuiltins <- samples |> + filter(category == "c-builtins") |> + count(leaf_sym, sort = TRUE) |> + mutate(pct = sprintf("%.1f%%", 100 * n / sum(n))) + + if (nrow(cbuiltins) > 0) { + cat(strrep("=", 60), "\n") + cat(" C-Builtins Detail (leaf symbols)\n") + cat(strrep("=", 60), "\n") + cbuiltins_out <- cbuiltins |> + slice_head(n = top_n) |> + mutate(n = as.character(n)) + print_table(c("Symbol", "Samples", "%"), cbuiltins_out) + cat("\n") + + write_csv(cbuiltins, file.path(perf_dir, "cbuiltins_detail.csv")) + } + + # ── Table 5: Unmatched detail ── + unmatched <- samples |> + filter(category == "unmatched") |> + count(leaf_dso, leaf_sym, sort = TRUE) + + if (nrow(unmatched) > 0) { + cat(strrep("=", 60), "\n") + cat(" Unmatched Samples (unknown DSO/symbol)\n") + cat(strrep("=", 60), "\n") + unmatched_out <- unmatched |> + slice_head(n = top_n) |> + mutate(n = as.character(n)) + print_table(c("DSO", "Symbol", "Samples"), unmatched_out) + cat("\n") + } + + message("CSV files written to ", perf_dir) +} + +main() diff --git a/rcp/inst/perf/harness.R b/rcp/inst/perf/harness.R new file mode 100644 index 0000000..4a1d5cb --- /dev/null +++ b/rcp/inst/perf/harness.R @@ -0,0 +1,61 @@ +# Benchmark harness for run-perf.sh +# Sourced into its own environment to avoid polluting the global namespace. + +harness <- local({ + + # Source a benchmark file, handling setwd so that relative source() calls + # (e.g. source('random.r')) resolve correctly. + load_benchmark <- function(bench_file) { + bench_dir <- dirname(bench_file) + old_wd <- setwd(bench_dir) + on.exit(setwd(old_wd)) + source(basename(bench_file), local = FALSE) + } + + # Run execute(param) `iterations` times and return elapsed times as a vector. + run_benchmark <- function(param, iterations) { + times <- numeric(iterations) + for (i in seq_len(iterations)) { + times[i] <- system.time(execute(param))[["elapsed"]] + } + times + } + + # Get the default parameter for a benchmark's execute() function. + # Returns NA if execute() has no default (e.g. `unused`). + get_default_param <- function() { + f <- formals(execute) + if (length(f) == 0) return(NA) + # A missing default is stored as a symbol; a real default is numeric/etc. + if (typeof(f[[1]]) == "symbol") NA else f[[1]] + } + + # Compile all closures in the global environment using rcp_cmpfun. + # Returns a list with counts of compiled and failed functions. + compile_global_funs <- function() { + env <- globalenv() + nms <- ls(envir = env) + compiled <- 0L + failed <- 0L + for (nm in nms) { + obj <- get(nm, envir = env) + if (is.function(obj)) { + tryCatch({ + print(nm) + assign(nm, rcp_cmpfun(obj, list(name = nm, optimize = 3)), envir = env) + compiled <- compiled + 1L + }, error = function(e) { + failed <<- failed + 1L + }) + } + } + list(compiled = compiled, failed = failed) + } + + # Write timing results to a CSV file. + write_times <- function(times, output_file) { + write.csv(data.frame(time = times), output_file, row.names = FALSE) + } + + environment() +}) diff --git a/rcp/inst/perf/run-perf.sh b/rcp/inst/perf/run-perf.sh new file mode 100755 index 0000000..13ea99b --- /dev/null +++ b/rcp/inst/perf/run-perf.sh @@ -0,0 +1,271 @@ +#!/usr/bin/env bash +# +# Run areWeFast benchmarks under different profiling modes. +# +# Usage: run-perf.sh [options] [benchmark ...] +# --iterations N Number of runs per benchmark (default: 15) +# --stack-size N DWARF stack size for perf (default: 16384) +# --frequency N Sampling frequency in Hz for both perf and Rprof (default: 99) +# --output DIR Output directory (default: results/) +# --mode MODE,... Comma-separated modes to run (default: all) +# Available: vanilla, vanilla-rprof, rcp, rcp-perf +# --vanilla-r PATH Path to vanilla R bin/ dir (default: /mnt/data-1/krikava/R-4.3.2/bin) +# --project-r PATH Path to project R bin/ dir (default: auto-detect from common.mk) +# If no benchmarks specified, runs all areWeFast benchmarks. + +set -euo pipefail + +# --------------------------------------------------------------------------- +# Defaults +# --------------------------------------------------------------------------- +ITERATIONS=15 +STACK_SIZE=16384 +FREQUENCY=999 +OUTPUT_DIR="results" +VANILLA_R_DIR="/mnt/data-1/krikava/R-4.3.2/bin" +PROJECT_R_DIR="" +USER_MODES="" + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" +HELPER="$SCRIPT_DIR/harness.R" +BENCH_DIR="$(cd "$ROOT_DIR/../external/rsh/client/rsh/inst/benchmarks/areWeFast" && pwd)" + +ALL_BENCHMARKS=(bounce bounce_nonames bounce_nonames_simple mandelbrot storage towers queens sieve) + +# --------------------------------------------------------------------------- +# Parse arguments +# --------------------------------------------------------------------------- +BENCHMARKS=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --iterations) + ITERATIONS="$2" + shift 2 + ;; + --stack-size) + STACK_SIZE="$2" + shift 2 + ;; + --frequency) + FREQUENCY="$2" + shift 2 + ;; + --output) + OUTPUT_DIR="$2" + shift 2 + ;; + --mode) + USER_MODES="$2" + shift 2 + ;; + --vanilla-r) + VANILLA_R_DIR="$2" + shift 2 + ;; + --project-r) + PROJECT_R_DIR="$2" + shift 2 + ;; + --help | -h) + sed -n '3,12p' "$0" | sed 's/^# *//' + exit 0 + ;; + *) + BENCHMARKS+=("$1") + shift + ;; + esac +done + +if [[ ${#BENCHMARKS[@]} -eq 0 ]]; then + BENCHMARKS=("${ALL_BENCHMARKS[@]}") +fi + +# --------------------------------------------------------------------------- +# Resolve modes +# --------------------------------------------------------------------------- +ALL_MODES=(vanilla vanilla-rprof rcp rcp-perf) + +if [[ -n "$USER_MODES" ]]; then + IFS=',' read -ra MODES <<<"$USER_MODES" + for m in "${MODES[@]}"; do + valid=0 + for a in "${ALL_MODES[@]}"; do [[ "$m" == "$a" ]] && valid=1; done + if [[ $valid -eq 0 ]]; then + echo "Error: unknown mode '$m'. Available: ${ALL_MODES[*]}" >&2 + exit 1 + fi + done +else + MODES=("${ALL_MODES[@]}") +fi + +# --------------------------------------------------------------------------- +# Resolve R binaries +# --------------------------------------------------------------------------- +needs_vanilla=0 +needs_project=0 +for m in "${MODES[@]}"; do + case "$m" in vanilla | vanilla-rprof) needs_vanilla=1 ;; rcp | rcp-perf) needs_project=1 ;; esac +done + +VANILLA_R="$VANILLA_R_DIR/R" +if [[ $needs_vanilla -eq 1 ]] && [[ ! -x "$VANILLA_R" ]]; then + echo "Error: vanilla R not found at $VANILLA_R" >&2 + exit 1 +fi + +if [[ -z "$PROJECT_R_DIR" ]]; then + R_HOME="$(cd "$ROOT_DIR/../external/rsh/external/R" && pwd)" + PROJECT_R_DIR="$R_HOME/bin" +fi +PROJECT_R="$PROJECT_R_DIR/R" +if [[ $needs_project -eq 1 ]] && [[ ! -x "$PROJECT_R" ]]; then + echo "Error: project R not found at $PROJECT_R" >&2 + exit 1 +fi + +echo "Configuration:" +echo " Vanilla R: $VANILLA_R" +echo " Project R: $PROJECT_R" +echo " Iterations: $ITERATIONS" +echo " Stack size: $STACK_SIZE" +echo " Frequency: ${FREQUENCY} Hz" +echo " Output: $OUTPUT_DIR" +echo " Modes: ${MODES[*]}" +echo " Benchmarks: ${BENCHMARKS[*]}" +echo "" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +# Collect all results for summary table: arrays of "bench mode median" +declare -a SUMMARY_ROWS=() + +median_from_csv() { + local csv="$1" + "$VANILLA_R" --slave -e " + d <- read.csv('$csv') + cat(median(d\$time)) + " +} + +run_mode() { + local bench="$1" mode="$2" + local bench_file="$BENCH_DIR/${bench}.R" + local out="$OUTPUT_DIR/$bench" + + echo " [$mode]" + + case "$mode" in + # ------------------------------------------------------------------- + vanilla) + "$VANILLA_R" --slave --no-restore -e " + source('$HELPER') + harness\$load_benchmark('$bench_file') + param <- harness\$get_default_param() + times <- harness\$run_benchmark(param, ${ITERATIONS}) + harness\$write_times(times, '$out/vanilla.csv') + " + ;; + + # ------------------------------------------------------------------- + vanilla-rprof) + mkdir -p "$out/vanilla-rprof" + "$VANILLA_R" --slave --no-restore -e " + source('$HELPER') + harness\$load_benchmark('$bench_file') + param <- harness\$get_default_param() + Rprof('$out/vanilla-rprof/Rprof.out', interval=1/${FREQUENCY}, memory.profiling=T, gc.profiling=T) + times <- harness\$run_benchmark(param, ${ITERATIONS}) + Rprof(NULL) + harness\$write_times(times, '$out/vanilla-rprof.csv') + " + ;; + + # ------------------------------------------------------------------- + rcp) + "$PROJECT_R" --slave --no-restore -e " + library(rcp) + rcp_cmppkg('base') + source('$HELPER') + harness\$load_benchmark('$bench_file') + param <- harness\$get_default_param() + print(harness\$compile_global_funs()) + times <- harness\$run_benchmark(param, ${ITERATIONS}) + harness\$write_times(times, '$out/rcp.csv') + " + ;; + + # ------------------------------------------------------------------- + rcp-perf) + mkdir -p "$out/rcp-perf" + + RCP_PERF_JIT=1 \ + perf record -F "$FREQUENCY" -g --call-graph "dwarf,$STACK_SIZE" -k1 -e cpu-cycles:u \ + -o "$out/rcp-perf/perf.data" \ + -- "$PROJECT_R" --slave --no-restore -e " + library(rcp) + rcp_cmppkg('base') + source('$HELPER') + harness\$load_benchmark('$bench_file') + param <- harness\$get_default_param() + print(harness\$compile_global_funs()) + times <- harness\$run_benchmark(param, ${ITERATIONS}) + harness\$write_times(times, '$out/rcp-perf.csv') + " + + echo " Injecting JIT symbols..." + perf inject --jit \ + -i "$out/rcp-perf/perf.data" \ + -o "$out/rcp-perf/perf.jit.data" + ;; + esac +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- +for bench in "${BENCHMARKS[@]}"; do + bench_file="$BENCH_DIR/${bench}.R" + if [[ ! -f "$bench_file" ]]; then + echo "Warning: benchmark $bench_file not found, skipping" >&2 + continue + fi + + echo "=== $bench ===" + mkdir -p "$OUTPUT_DIR/$bench" + + for mode in "${MODES[@]}"; do + run_mode "$bench" "$mode" + + # Record result for summary + csv="$OUTPUT_DIR/$bench/${mode}.csv" + if [[ "$mode" == "vanilla-rprof" ]]; then + csv="$OUTPUT_DIR/$bench/vanilla-rprof.csv" + fi + if [[ -f "$csv" ]]; then + med=$(median_from_csv "$csv") + SUMMARY_ROWS+=("$bench $mode $med") + fi + done + echo "" +done + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- +echo "========================================" +echo " Summary (median elapsed time, seconds)" +echo "========================================" +printf "%-30s %-16s %s\n" "Benchmark" "Mode" "Median(s)" +printf "%-30s %-16s %s\n" "------------------------------" "----------------" "--------" +for row in "${SUMMARY_ROWS[@]}"; do + read -r bench mode med <<<"$row" + printf "%-30s %-16s %s\n" "$bench" "$mode" "$med" +done +echo "" +echo "Done." From cab5882caad1955e5045fdbd65d48dcfb814b459 Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Tue, 10 Mar 2026 12:13:33 +0100 Subject: [PATCH 02/17] Make sure we do not cache perf results --- rcp/inst/perf/analyze-perf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rcp/inst/perf/analyze-perf.R b/rcp/inst/perf/analyze-perf.R index ca5ca62..3125471 100755 --- a/rcp/inst/perf/analyze-perf.R +++ b/rcp/inst/perf/analyze-perf.R @@ -76,7 +76,7 @@ is_system_dso <- function(dso) { parse_perf_script <- function(perf_data_file) { # Check if a cached perf script text file exists txt_file <- sub("\\.[^.]+$", ".txt", perf_data_file) - if (file.exists(txt_file)) { + if (file.exists(txt_file) && file.mtime(txt_file) >= file.mtime(perf_data_file)) { message("Reading cached perf script output from ", txt_file, " ...") lines <- readLines(txt_file) } else { From 4760df0cf1790c15b3a47312c37fc7b8fbe71b5e Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Tue, 10 Mar 2026 12:14:31 +0100 Subject: [PATCH 03/17] Add onAttach header --- rcp/R/compile.R | 17 +++++++++++++++++ rcp/src/Makevars | 3 +++ rcp/src/compile.c | 18 ++++++++++++++++++ rcp/src/rcp_init.c | 2 ++ 4 files changed, 40 insertions(+) diff --git a/rcp/R/compile.R b/rcp/R/compile.R index 0e1228d..ff5ec1c 100644 --- a/rcp/R/compile.R +++ b/rcp/R/compile.R @@ -1,7 +1,24 @@ +.rcp_banner <- function() { + info <- .Call("C_rcp_build_info") + ver <- utils::packageVersion("rcp") + flags <- c( + if (info$compile_promises) "promises", + if (nzchar(Sys.getenv("RCP_DUMP_DIR"))) paste0("dump:", Sys.getenv("RCP_DUMP_DIR")), + if (nzchar(Sys.getenv("RCP_GDB_JIT"))) "gdb", + if (nzchar(Sys.getenv("RCP_PERF_JIT"))) "perf" + ) + flag_str <- if (length(flags)) paste0(" [", paste(flags, collapse = ", "), "]") else "" + packageStartupMessage(sprintf("rcp %s (%s)%s", ver, info$git_commit, flag_str)) +} + .onLoad <- function(libname, pkgname) { .Call("rcp_init"); } +.onAttach <- function(libname, pkgname) { + .rcp_banner() +} + #' Compile a function #' #' This function compiles another function with optional settings. diff --git a/rcp/src/Makevars b/rcp/src/Makevars index f014f74..0b30d26 100644 --- a/rcp/src/Makevars +++ b/rcp/src/Makevars @@ -1,5 +1,8 @@ include ../common.mk +RCP_GIT_COMMIT := $(shell git -C $(ROOT_DIR) rev-parse --short HEAD 2>/dev/null || echo NA) +PKG_CFLAGS += -DRCP_GIT_COMMIT=\"$(RCP_GIT_COMMIT)\" + OPENMP ?= 0 ifneq ($(OPENMP),0) diff --git a/rcp/src/compile.c b/rcp/src/compile.c index a9810a8..7d75f64 100644 --- a/rcp/src/compile.c +++ b/rcp/src/compile.c @@ -3035,6 +3035,24 @@ SEXP rcp_init(void) return R_NilValue; } +#ifndef RCP_GIT_COMMIT +#define RCP_GIT_COMMIT "NA" +#endif + +SEXP C_rcp_build_info(void) +{ + const char *names[] = {"git_commit", "compile_promises", ""}; + SEXP info = PROTECT(Rf_mkNamed(VECSXP, names)); + SET_VECTOR_ELT(info, 0, mkString(RCP_GIT_COMMIT)); +#ifdef RCP_COMPILE_PROMISES + SET_VECTOR_ELT(info, 1, ScalarLogical(1)); +#else + SET_VECTOR_ELT(info, 1, ScalarLogical(0)); +#endif + UNPROTECT(1); + return info; +} + void rcp_destr(void) { if (rcp_perf_jit_enabled) diff --git a/rcp/src/rcp_init.c b/rcp/src/rcp_init.c index 0cbca6e..89b445d 100644 --- a/rcp/src/rcp_init.c +++ b/rcp/src/rcp_init.c @@ -17,6 +17,7 @@ extern SEXP C_rcp_gdb_jit_support(void); extern SEXP C_rcp_perf_jit_support(void); extern SEXP rcp_init(void); extern void rcp_destr(void); +extern SEXP C_rcp_build_info(void); extern SEXP __rcp_throw_exception(void); extern SEXP __rcp_test_catch(SEXP expr, SEXP env); @@ -34,6 +35,7 @@ static const R_CallMethodDef CallEntries[] = { {"rcp_dwarf_support", (DL_FUNC)&C_rcp_dwarf_support, 0}, {"rcp_gdb_jit_support", (DL_FUNC)&C_rcp_gdb_jit_support, 0}, {"rcp_perf_jit_support", (DL_FUNC)&C_rcp_perf_jit_support, 0}, + {"C_rcp_build_info", (DL_FUNC)&C_rcp_build_info, 0}, {"rcp_init", (DL_FUNC)&rcp_init, 0}, {"__rcp_throw_exception", (DL_FUNC)&__rcp_throw_exception, 0}, {"__rcp_test_catch", (DL_FUNC)&__rcp_test_catch, 2}, From d87cbb102731383a746ecd68770b121bf2d0c50c Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Tue, 10 Mar 2026 12:15:14 +0100 Subject: [PATCH 04/17] Better heuristic on when to recompile --- .gitignore | 1 + rcp/Makefile | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d4fb526..6a7dfd6 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ perf.data /rcp/tests/perf/*.old /rcp/tests/perf/*.data /rcp/tests/perf/profile.json.gz +/rcp/.install-stamp diff --git a/rcp/Makefile b/rcp/Makefile index 6e5f707..37a997d 100644 --- a/rcp/Makefile +++ b/rcp/Makefile @@ -20,13 +20,21 @@ check-toolchain: .PHONY: all all: install +STAMP := .install-stamp + .PHONY: install install: check-toolchain - MAKEFLAGS="$(MAKEFLAGS) CC=$(CC) CXX=$(CXX)" $(R) CMD INSTALL . + @if [ ! -f $(STAMP) ] || \ + [ -n "$$(find src R DESCRIPTION NAMESPACE common.mk -newer $(STAMP) 2>/dev/null | head -1)" ]; then \ + MAKEFLAGS="$(MAKEFLAGS) CC=$(CC) CXX=$(CXX)" $(R) CMD INSTALL . && touch $(STAMP); \ + else \ + echo "Nothing changed, skipping install."; \ + fi .PHONY: clean clean: $(MAKE) -C src -f Makevars clean + rm -f $(STAMP) .PHONY: test test: check-toolchain install From 2e79185c7581195a94c1c1c55b8e817a20e9b134 Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Tue, 10 Mar 2026 12:15:32 +0100 Subject: [PATCH 05/17] More tests --- rcp/tests/smoketest/Makefile | 2 +- rcp/tests/smoketest/cmppkg-base.R | 19 ++++++++++++++++++ .../{pkgcmp.R => cmppkg-replacement.R} | 0 rcp/tests/smoketest/jit.R | 20 +++++++++++++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 rcp/tests/smoketest/cmppkg-base.R rename rcp/tests/smoketest/{pkgcmp.R => cmppkg-replacement.R} (100%) create mode 100644 rcp/tests/smoketest/jit.R diff --git a/rcp/tests/smoketest/Makefile b/rcp/tests/smoketest/Makefile index 3a85f48..d12f95d 100644 --- a/rcp/tests/smoketest/Makefile +++ b/rcp/tests/smoketest/Makefile @@ -1,6 +1,6 @@ include ../../common.mk -TESTS = basic.R pkgcmp.R cmppkg-dot-functions.R dotcall-issue-12.R ggplot2-unwinding.R cmppkg-s3-generics.R +TESTS = $(wildcard *.R) .PHONY: all test clean diff --git a/rcp/tests/smoketest/cmppkg-base.R b/rcp/tests/smoketest/cmppkg-base.R new file mode 100644 index 0000000..b2da6ef --- /dev/null +++ b/rcp/tests/smoketest/cmppkg-base.R @@ -0,0 +1,19 @@ +library(rcp) + +# Test: compile base package and verify functions work +stopifnot(!rcp_is_compiled(base::Reduce)) +stopifnot(!rcp_is_compiled(base::Filter)) +stopifnot(!rcp_is_compiled(base::Map)) + +rcp_cmppkg("base") + +stopifnot(rcp_is_compiled(base::Reduce)) +stopifnot(rcp_is_compiled(base::Filter)) +stopifnot(rcp_is_compiled(base::Map)) + +# Verify compiled base functions still work correctly +stopifnot(Reduce("+", 1:5) == 15) +stopifnot(identical(Filter(is.numeric, list(1, "a", 2)), list(1, 2))) +stopifnot(identical(Map("+", 1:3, 4:6), list(5L, 7L, 9L))) + +cat("OK\n") diff --git a/rcp/tests/smoketest/pkgcmp.R b/rcp/tests/smoketest/cmppkg-replacement.R similarity index 100% rename from rcp/tests/smoketest/pkgcmp.R rename to rcp/tests/smoketest/cmppkg-replacement.R diff --git a/rcp/tests/smoketest/jit.R b/rcp/tests/smoketest/jit.R new file mode 100644 index 0000000..a18b6fd --- /dev/null +++ b/rcp/tests/smoketest/jit.R @@ -0,0 +1,20 @@ +library(rcp) + +# Enable the RCP JIT (hooks into R's JIT mechanism) +rcp_jit_enable() + +# Test 1: global function gets JIT-compiled when called +my_add <- function(x, y) x + y +stopifnot(!rcp_is_compiled(my_add)) +for (i in 1:10) my_add(1L, 2L) +stopifnot(rcp_is_compiled(my_add)) +stopifnot(my_add(3, 4) == 7) + +rcp_jit_disable() + +# Test 2: after disabling, new functions should not be JIT-compiled +new_fun <- function(x) x * 3 +for (i in 1:10) new_fun(1L) +stopifnot(!rcp_is_compiled(new_fun)) + +cat("OK\n") From c201e867dc9c8f3a1857bcbd0ca7d9101de87fab Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Tue, 10 Mar 2026 12:15:46 +0100 Subject: [PATCH 06/17] Fix dumping binary --- rcp/src/compile.c | 99 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/rcp/src/compile.c b/rcp/src/compile.c index 7d75f64..e7705fa 100644 --- a/rcp/src/compile.c +++ b/rcp/src/compile.c @@ -551,7 +551,7 @@ static void patch(uint8_t *dst, uint8_t *loc, int pos, const Stencil *stencil, int label_pos = imms[hole->val.imm_pos] - 1; while (ctx->executable_lookup[label_pos] == NULL || ctx->bytecode[label_pos] != opcode_to_find) { - if(ctx->executable_lookup[label_pos] != NULL) + if (ctx->executable_lookup[label_pos] != NULL) DEBUG_PRINT("Looking for opcode %d at position %d, found %d\n", opcode_to_find, label_pos, ctx->bytecode[label_pos]); label_pos--; @@ -1075,6 +1075,19 @@ static void peephole_goto(int bytecode[], int bytecode_size, SEXP *constpool) } } +static void dump_compiled_binary(const char *dump_dir, const char *name, const uint8_t *executable, size_t size) +{ + char dump_path[512]; + snprintf(dump_path, sizeof(dump_path), "%s/%s.o", dump_dir, name); + FILE *fp = fopen(dump_path, "wb"); + if (fp) + { + fwrite(executable, 1, size, fp); + fclose(fp); + fprintf(stderr, "RCP: wrote binary to %s (%zu bytes)\n", dump_path, size); + } +} + typedef struct PluginStencil { int pos; @@ -1368,12 +1381,12 @@ static rcp_exec_ptrs copy_patch_internal(int bytecode[], int bytecode_size, int stepfor_bc = bytecode[bc_pos + 1 + 2] - 1; DEBUG_PRINT("Looking for STEPFOR_BCOP at position %d\n", stepfor_bc); - while(bytecode[stepfor_bc] != STEPFOR_BCOP) + while (bytecode[stepfor_bc] != STEPFOR_BCOP) { // We need to find the corresponding STEPFOR_BCOP instruction to know where to copy the specialized code to DEBUG_PRINT("Not found, found %s instead. Following this instruction.\n", OPCODES_NAMES[bytecode[stepfor_bc]]); - if(bytecode[stepfor_bc] == GOTO_BCOP) + if (bytecode[stepfor_bc] == GOTO_BCOP) stepfor_bc = bytecode[stepfor_bc + 1] - 1; else stepfor_bc += RCP_BC_ARG_CNT[bytecode[stepfor_bc]] + 1; @@ -1507,11 +1520,20 @@ static rcp_exec_ptrs copy_patch_internal(int bytecode[], int bytecode_size, res.eh_frame_data = eh_frame_data; if (rcp_gdb_jit_enabled) + { res.jit_entry = gdb_jit_register(name, executable, insts_size, inst_addrs_packed, count_opcodes, instruction_stencils); + } else + { res.jit_entry = NULL; + const char *dump_dir = getenv("RCP_DUMP_DIR"); + if (dump_dir) + { + dump_compiled_binary(dump_dir, name, executable, insts_size); + } + } if (rcp_perf_jit_enabled) { @@ -2088,6 +2110,64 @@ SEXP C_rcp_is_compiled(SEXP closure) return Rf_ScalarLogical(TRUE); } +static const char *guess_closure_name(SEXP f) +{ + SEXP env = CLOENV(f); + if (env == R_EmptyEnv || env == R_NilValue) + return NULL; + + SEXP names = PROTECT(R_lsInternal3(env, TRUE, FALSE)); + int n = LENGTH(names); + const char *sym_name = NULL; + + for (int i = 0; i < n; i++) + { + const char *s = CHAR(STRING_ELT(names, i)); + SEXP val = Rf_findVarInFrame(env, Rf_install(s)); + if (val == f) + { + sym_name = s; + break; + } + } + UNPROTECT(1); // names + + if (sym_name == NULL) + return NULL; + + const char *prefix = NULL; + int prefix_len = 0; + char env_buf[32]; + + if (R_IsNamespaceEnv(env)) + { + prefix = CHAR(STRING_ELT(R_NamespaceEnvSpec(env), 0)); + prefix_len = strlen(prefix) + 2; // "pkg::" + } + else if (env == R_GlobalEnv) + { + prefix = NULL; + prefix_len = 0; + } + else + { + snprintf(env_buf, sizeof(env_buf), "__%p", (void *)env); + prefix = env_buf; + prefix_len = strlen(env_buf) + 2; // "__0x...::sym" + } + + int sym_len = strlen(sym_name); + int total = (prefix ? prefix_len : 0) + sym_len + 1; + char *result = R_alloc(total, 1); + + if (prefix && env != R_GlobalEnv) + snprintf(result, total, "%s::%s", prefix, sym_name); + else + snprintf(result, total, "%s", sym_name); + + return result; +} + SEXP C_rcp_cmpfun(SEXP f, SEXP options) { DEBUG_PRINT("Starting to JIT a function...\n"); @@ -2192,6 +2272,15 @@ SEXP C_rcp_cmpfun(SEXP f, SEXP options) } } + if (strcmp(name, "") == 0) + { + const char *guessed = guess_closure_name(f); + if (guessed != NULL) + { + name = guessed; + } + } + DEBUG_PRINT("Compiling %s to bytecode...\n", name); SEXP compiled = compile_to_bc(f, options); #ifdef BC_DEFAULT_OPTIMIZE_LEVEL @@ -2705,7 +2794,7 @@ SEXP C_rcp_get_types(void) SET_STRING_ELT(args_names, k, PRINTNAME(trace->argument_names[k])); else { - char name_buf[16]; + char name_buf[32]; snprintf(name_buf, sizeof(name_buf), "arg%zu", k + 1); SET_STRING_ELT(args_names, k, Rf_mkChar(name_buf)); } @@ -2856,7 +2945,7 @@ SEXP C_rcp_get_types_df(SEXP func_name_sexp) } else { - char name_buf[16]; + char name_buf[32]; snprintf(name_buf, sizeof(name_buf), "arg%zu", c + 1); SET_STRING_ELT(col_names, c, Rf_mkChar(name_buf)); } From 15f4f9d0d6dc07dc9d720f9f73c5082efba9f76d Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Tue, 10 Mar 2026 17:38:10 +0100 Subject: [PATCH 07/17] Sync rsh --- external/rsh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/rsh b/external/rsh index a9264b0..6276fe0 160000 --- a/external/rsh +++ b/external/rsh @@ -1 +1 @@ -Subproject commit a9264b0c525f5763a1029b4b7afd00c4aab62702 +Subproject commit 6276fe06771221a90abd3eb0f7a7ff9c1e857678 From 58226fa7536d95ee2cee40219b325aafa59ee286 Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Wed, 11 Mar 2026 18:22:54 +0100 Subject: [PATCH 08/17] Fix the ELF dump --- rcp/src/gdb_jit.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/rcp/src/gdb_jit.c b/rcp/src/gdb_jit.c index c703065..2399c52 100644 --- a/rcp/src/gdb_jit.c +++ b/rcp/src/gdb_jit.c @@ -836,10 +836,37 @@ struct jit_code_entry *gdb_jit_register(const char *func_name, void *code_addr, FILE *fp = fopen(dump_path, "wb"); if (fp) { - fwrite(elf, 1, elf_size, fp); + // Create a copy with embedded code bytes so the ELF is + // self-contained for standalone inspection (e.g. gdb f.o). + // The in-memory ELF uses SHT_NOBITS for .text since GDB's + // JIT interface reads code from process memory directly. + size_t text_offset = (elf_size + 15) & ~15; // align to 16 + size_t dump_size = text_offset + code_size; + uint8_t *dump_elf = malloc(dump_size); + if (dump_elf) + { + memcpy(dump_elf, elf, elf_size); + memcpy(dump_elf + text_offset, code_addr, code_size); + + // Patch .text: SHT_NOBITS -> SHT_PROGBITS + Elf64_Ehdr *dehdr = (Elf64_Ehdr *)dump_elf; + Elf64_Shdr *dshdrs = + (Elf64_Shdr *)(dump_elf + dehdr->e_shoff); + dshdrs[SEC_TEXT].sh_type = SHT_PROGBITS; + dshdrs[SEC_TEXT].sh_offset = text_offset; + + // Patch program header so the segment maps file data + Elf64_Phdr *dphdr = + (Elf64_Phdr *)(dump_elf + dehdr->e_phoff); + dphdr->p_offset = text_offset; + dphdr->p_filesz = code_size; + + fwrite(dump_elf, 1, dump_size, fp); + free(dump_elf); + } fclose(fp); fprintf(stderr, "DEBUG: wrote JIT ELF to %s (%zu bytes)\n", - dump_path, elf_size); + dump_path, dump_size); } } From 3e8a70d8a1dde2bb71a4a7c059727184468c404c Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Wed, 11 Mar 2026 18:23:48 +0100 Subject: [PATCH 09/17] Fix the dotcall test --- rcp/tests/smoketest/dotcall-issue-12.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rcp/tests/smoketest/dotcall-issue-12.R b/rcp/tests/smoketest/dotcall-issue-12.R index b69c74c..75895f5 100644 --- a/rcp/tests/smoketest/dotcall-issue-12.R +++ b/rcp/tests/smoketest/dotcall-issue-12.R @@ -18,6 +18,13 @@ test_dotcall_str <- rcp::rcp_cmpfun( stopifnot(isTRUE(test_dotcall_str(test_dotcall))) stopifnot(isFALSE(test_dotcall_str(function() 1))) +# Test .Call with string and no PACKAGE (fallback path via do_dotcall) +test_dotcall_no_pkg <- rcp::rcp_cmpfun( + function(x) .Call("rcp_is_compiled", x), + list(name="test_dotcall_no_pkg")) +stopifnot(isTRUE(test_dotcall_no_pkg(test_dotcall))) +stopifnot(isFALSE(test_dotcall_no_pkg(function() 1))) + # Test DOTCALL with 0 args test_dotcall_0 <- rcp::rcp_cmpfun( function() .Call(rcp:::C_rcp_dwarf_support), From 9b10bf44f3764a1a93c82f7af60b224b693386c3 Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Wed, 11 Mar 2026 18:24:34 +0100 Subject: [PATCH 10/17] Add perf run --- rcp/inst/perf/run-perf.sh | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/rcp/inst/perf/run-perf.sh b/rcp/inst/perf/run-perf.sh index 13ea99b..aab2269 100755 --- a/rcp/inst/perf/run-perf.sh +++ b/rcp/inst/perf/run-perf.sh @@ -19,9 +19,9 @@ set -euo pipefail # Defaults # --------------------------------------------------------------------------- ITERATIONS=15 -STACK_SIZE=16384 -FREQUENCY=999 -OUTPUT_DIR="results" +STACK_SIZE=32768 +FREQUENCY=99 +OUTPUT_DIR="results-$FREQUENCY-$STACK_SIZE" VANILLA_R_DIR="/mnt/data-1/krikava/R-4.3.2/bin" PROJECT_R_DIR="" USER_MODES="" @@ -222,6 +222,36 @@ run_mode() { perf inject --jit \ -i "$out/rcp-perf/perf.data" \ -o "$out/rcp-perf/perf.jit.data" + + echo " Exporting perf script to CSV..." + perf script -i "$out/rcp-perf/perf.jit.data" 2>/dev/null | awk ' +BEGIN { OFS=","; print "sample_id,timestamp,frame_pos,sym,dso"; sid=0 } +/^[^ \t]/ { + sid++ + # Extract timestamp: matches digits.digits followed by colon + ts = "" + for (i = 1; i <= NF; i++) { + if ($i ~ /^[0-9]+\.[0-9]+:$/) { ts = $i; sub(/:$/, "", ts); break } + } + fpos = 0 + next +} +/^\s+[0-9a-f]+ / { + fpos++ + # Parse: addr sym+0xoffset (dso) + addr = $1 + # DSO is last field in parens + dso = $NF; gsub(/[()]/, "", dso) + # Symbol is everything between addr and (dso), minus offset + sym = "" + for (i = 2; i < NF; i++) sym = (sym == "" ? $i : sym " " $i) + sub(/\+0x[0-9a-fA-F]+$/, "", sym) + # CSV-escape fields that might contain commas or quotes + gsub(/"/, "\"\"", sym) + gsub(/"/, "\"\"", dso) + print sid, ts, fpos, "\"" sym "\"", "\"" dso "\"" +} +' >"$out/rcp-perf/frames.csv" ;; esac } From b6d423a39fea0f8e7eefd6c1fed3f6a1c3d608bc Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Fri, 20 Mar 2026 13:00:33 +0100 Subject: [PATCH 11/17] Clean up and fix tests --- .gitignore | 1 + rcp/code.R | 84 ---- rcp/inst/perf/.gitignore | 1 - rcp/inst/perf/analyze-perf.R | 503 ------------------- rcp/inst/perf/harness.R | 61 --- rcp/inst/perf/run-perf.sh | 301 ----------- rcp/src/stencils/Makefile | 4 +- rcp/tests/Makefile | 2 +- rcp/tests/gdb-jit/gdb-next/expected.log | 3 +- rcp/tests/gdb-jit/gdb-recursion/expected.log | 5 +- rcp/tests/perf/Makefile | 0 11 files changed, 10 insertions(+), 955 deletions(-) delete mode 100644 rcp/code.R delete mode 100644 rcp/inst/perf/.gitignore delete mode 100755 rcp/inst/perf/analyze-perf.R delete mode 100644 rcp/inst/perf/harness.R delete mode 100755 rcp/inst/perf/run-perf.sh delete mode 100644 rcp/tests/perf/Makefile diff --git a/.gitignore b/.gitignore index 6a7dfd6..b65bb82 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ perf.data /rcp/tests/perf/*.data /rcp/tests/perf/profile.json.gz /rcp/.install-stamp +/rcp/.install-stamp.cfg diff --git a/rcp/code.R b/rcp/code.R deleted file mode 100644 index 366d4ab..0000000 --- a/rcp/code.R +++ /dev/null @@ -1,84 +0,0 @@ -options(rcp.cmpfun.entry_exit_hooks = TRUE) -library(rcp) -fib <- function(x) { - if (x == 0) 0 - else if (x == 1) 1 - else fib(x-2) + fib(x-1) -} - -fib = rcp::rcp_cmpfun(fib, list(name="fib")) -fib(10) -print(rcp::rcp_get_types_df("fib")) - -library(rcp) -test <- function(x) { - if (x == 0) x=10 - else x=11 - x -} - -test =rcp::rcp_cmpfun(test); -test(1) - -exec <- function(x) { - 1 -} - - -tmp = rcp::rcp_cmpfun(exec) - - - -exec <- function(x) { -repeat { - next - } -} - -library(rcp) -f <- function(x) { - y <- x + 1 - if(y > 0){ - z <- x - 1 - } - else { - z <- x + 1 - } - y <- z / y - z -} -f = rcp::rcp_cmpfun(f, list(name="f")) -f(14) -print(rcp::rcp_get_types_df("f")) - -library(rcp) - -g <- function(x, y) { - cat(x, y, "\n") - x -} -g = rcp::rcp_cmpfun(g, list(name = "g")) -g(34, "hello") -g(1L, "world!") -g("Nope", 456) -print(rcp::rcp_get_types_df("g")) - -library(rcp) -h <- function(a, ...) { - cat(a, ..., "\n") -} -h = rcp::rcp_cmpfun(h, list(name = "h")) -h(1, "hello") -h("world", 4, "three") -h(4L, t=89) -print(rcp::rcp_get_types_df("h")) - -library(rcp) -p <- function(x, y) { - cat(x, y, "\n") - y -} -p <- rcp::rcp_cmpfun(p, list(name = "p")) -p(1, "hello") -p(y=3, x="world") -print(rcp::rcp_get_types_df("p")) \ No newline at end of file diff --git a/rcp/inst/perf/.gitignore b/rcp/inst/perf/.gitignore deleted file mode 100644 index fbca225..0000000 --- a/rcp/inst/perf/.gitignore +++ /dev/null @@ -1 +0,0 @@ -results/ diff --git a/rcp/inst/perf/analyze-perf.R b/rcp/inst/perf/analyze-perf.R deleted file mode 100755 index 3125471..0000000 --- a/rcp/inst/perf/analyze-perf.R +++ /dev/null @@ -1,503 +0,0 @@ -#!/usr/bin/env Rscript -#' Analyze perf.jit.data: categorize time by JIT R code, GC, alloc, C code, etc. -#' -#' Usage: Rscript analyze-perf.R [--benchmark-only] [--top N] - -suppressPackageStartupMessages({ - library(tidyverse) - library(stringr) -}) - -# --------------------------------------------------------------------------- -# Category definitions (data-driven, order = priority) -# --------------------------------------------------------------------------- - -CATEGORY_RULES <- tribble( - ~category, ~patterns, - "gc", list(c("R_gc_internal", "do_gc", "AgeNodeAndChildren", - "TryToReleasePages*", "mark_phase", "RunGenCollect")), - "alloc", list(c("Rf_allocVector*", "CONS_NR", "GetNewPage", "Rf_mkPROMISE", - "_int_malloc", "_int_free*", "malloc", "free", "calloc", "realloc")), - "env-lookup", list(c("Rf_findVarInFrame*", "findVarLocInFrame*", "R_findVar*", - "Rf_findFun*", "GET_BINDING_CELL", "R_envHasNoSpecialSymbols")), - "arg-matching", list(c("Rf_matchArgs*", "Rf_checkArityCall")), - "call-trampoline", list(c("Rsh_Call", "Rsh_closure_call_args", "Rsh_builtin_call_args", - "rcpEval", "rcpCallNative*")), - "bc-interpreter", list(c("bcEval*")), - "r-vm", list(c("Rf_eval", "forcePromise", "R_execClosure", "applyClosure*", - "R_cmpfun*", "Rf_ReplIteration")), - "context-mgmt", list(c("Rf_begincontext", "Rf_endcontext", "Rf_NewEnvironment", - "make_applyClosure_env", "R_BCRelPC", "R_BCVersionOK")), -) - -# Flatten patterns into regex for each category (glob -> regex) -glob_to_regex <- function(pat) { - # Use glob2rx from utils, which handles fnmatch-style * and ? correctly - # glob2rx("foo*") -> "^foo" (with trimhead/trimtail defaults) - # We want full anchored regex like fnmatch - utils::glob2rx(pat) -} - -CATEGORY_RULES <- CATEGORY_RULES |> - mutate( - patterns = map(patterns, ~ .x[[1]]), - regex = map_chr(patterns, ~ paste0(map_chr(.x, glob_to_regex), collapse = "|")) - ) - -ALL_CATEGORIES <- c( - "jit-r-code", "jit-unresolved", "gc", "alloc", "env-lookup", - "arg-matching", "call-trampoline", "bc-interpreter", "r-vm", - "context-mgmt", "rcp-runtime", "c-builtins", "system", "unmatched" -) - -INFRA_PACKAGES <- c( - "base", "utils", "stats", "methods", "grDevices", "graphics", - "datasets", "tools", "compiler", "grid", "parallel", "splines", - "stats4", "tcltk" -) - -# --------------------------------------------------------------------------- -# DSO helpers (vectorized) -# --------------------------------------------------------------------------- - -is_r_binary <- function(dso) grepl("(/R$|/R\\.bin$)", dso) -is_jitted <- function(dso) grepl("jitted-", dso, fixed = TRUE) & grepl("\\.so$", dso) -is_rcp_so <- function(dso) grepl("rcp\\.so$", dso) - -is_system_dso <- function(dso) { - dso == "[unknown]" | - grepl("libc\\.so|libm\\.so|libpthread|ld-linux|libdeflate|\\[kernel|libgcc|libstdc\\+\\+|\\[vdso\\]|libdl", dso) -} - -# --------------------------------------------------------------------------- -# Vectorized parsing of perf script output -# --------------------------------------------------------------------------- - -parse_perf_script <- function(perf_data_file) { - # Check if a cached perf script text file exists - txt_file <- sub("\\.[^.]+$", ".txt", perf_data_file) - if (file.exists(txt_file) && file.mtime(txt_file) >= file.mtime(perf_data_file)) { - message("Reading cached perf script output from ", txt_file, " ...") - lines <- readLines(txt_file) - } else { - message("Running perf script on ", perf_data_file, " ...") - lines <- system2("perf", c("script", "-i", perf_data_file), - stdout = TRUE, stderr = FALSE) - # Cache for reproducibility - message("Caching perf script output to ", txt_file, " ...") - writeLines(lines, txt_file) - } - message("Read ", length(lines), " lines, parsing ...") - - # Identify header lines vs frame lines - # Header lines: non-empty, don't start with space/tab - n <- length(lines) - empty <- nchar(lines) == 0L - first_char <- substr(lines, 1, 1) - is_header <- !empty & first_char != " " & first_char != "\t" - - # Parse frame lines - is_frame <- !empty & !is_header - frame_idx <- which(is_frame) - frame_matches <- str_match(lines[frame_idx], "^\\s+(\\S+)\\s+(.+)\\s+\\((.+)\\)\\s*$") - - valid <- !is.na(frame_matches[, 1]) - frame_idx <- frame_idx[valid] - frame_addr <- frame_matches[valid, 2] - frame_sym <- sub("\\+0x[0-9a-fA-F]+$", "", str_trim(frame_matches[valid, 3])) - frame_dso <- frame_matches[valid, 4] - - # Assign each line to a sample_id: cumsum of header lines - header_cumsum <- cumsum(is_header) - frame_sample_id <- header_cumsum[frame_idx] - - # Build frames tibble - frames_tbl <- tibble( - sample_id = frame_sample_id, - sym = frame_sym, - dso = frame_dso, - addr = frame_addr - ) - - # Frame position within each sample (1 = leaf) - frames_tbl <- frames_tbl |> - group_by(sample_id) |> - mutate(frame_pos = row_number()) |> - ungroup() - - # Extract headers and timestamps - header_idx <- which(is_header) - headers <- lines[header_idx] - ts_match <- str_match(headers, "\\s(\\d+\\.\\d+):") - timestamps <- as.numeric(ts_match[, 2]) - - samples <- tibble( - sample_id = seq_along(headers), - header = headers, - timestamp = timestamps - ) - - # Only keep samples that have frames - samples_with_frames <- unique(frames_tbl$sample_id) - samples <- samples |> filter(sample_id %in% samples_with_frames) - - list(samples = samples, frames = frames_tbl) -} - -# --------------------------------------------------------------------------- -# Classification (vectorized where possible, row-wise where needed) -# --------------------------------------------------------------------------- - -classify_samples <- function(samples, frames) { - # Get leaf frame for each sample - leaf <- frames |> - filter(frame_pos == 1) |> - select(sample_id, leaf_sym = sym, leaf_dso = dso) - - samples <- samples |> left_join(leaf, by = "sample_id") - - # Step 1: DSO-based classification - samples <- samples |> - mutate( - is_jit = is_jitted(leaf_dso), - is_unknown = leaf_dso == "[unknown]", - is_r = is_r_binary(leaf_dso) | leaf_dso == "inlined" | is_rcp_so(leaf_dso), - is_sys = is_system_dso(leaf_dso), - is_rcp = is_rcp_so(leaf_dso) - ) - - # For [unknown] DSO samples, check if JIT caller or Rsh_Call in stack - unknown_ids <- samples$sample_id[samples$is_unknown] - if (length(unknown_ids) > 0) { - caller_frames <- frames |> - filter(sample_id %in% unknown_ids, frame_pos > 1) - - jit_context_unknown <- caller_frames |> - group_by(sample_id) |> - summarise( - has_jit_caller = any(is_jitted(dso)), - has_rsh_caller = any(grepl("Rsh_Call", sym, fixed = TRUE)), - .groups = "drop" - ) - - samples <- samples |> - left_join(jit_context_unknown, by = "sample_id") |> - mutate( - has_jit_caller = replace_na(has_jit_caller, FALSE), - has_rsh_caller = replace_na(has_rsh_caller, FALSE) - ) - } else { - samples <- samples |> - mutate(has_jit_caller = FALSE, has_rsh_caller = FALSE) - } - - # Step 2: Symbol-based matching for R/rcp/inlined DSOs - # Build category column for symbol-based matches - sym_category <- rep(NA_character_, nrow(samples)) - for (i in seq_len(nrow(CATEGORY_RULES))) { - matched <- grepl(CATEGORY_RULES$regex[i], samples$leaf_sym) & samples$is_r & is.na(sym_category) - sym_category[matched] <- CATEGORY_RULES$category[i] - } - - # Also check alloc patterns for system DSOs - alloc_regex <- CATEGORY_RULES$regex[CATEGORY_RULES$category == "alloc"] - sys_alloc <- grepl(alloc_regex, samples$leaf_sym) & samples$is_sys - - # Final category assignment - samples <- samples |> - mutate( - sym_cat = sym_category, - category = case_when( - is_jit ~ "jit-r-code", - is_unknown & (has_jit_caller | has_rsh_caller) ~ "jit-unresolved", - is_unknown ~ "system", - is_r & !is.na(sym_cat) ~ sym_cat, - is_r & is_rcp ~ "rcp-runtime", - is_r ~ "c-builtins", - is_sys & sys_alloc ~ "alloc", - is_sys ~ "system", - TRUE ~ "unmatched" - ) - ) - - # Find JIT context: first jitted frame in each sample's stack - jit_frames <- frames |> - filter(is_jitted(dso)) |> - group_by(sample_id) |> - slice_min(frame_pos, n = 1, with_ties = FALSE) |> - ungroup() |> - select(sample_id, jit_context = sym) - - samples <- samples |> - left_join(jit_frames, by = "sample_id") |> - mutate( - package = case_when( - is.na(jit_context) ~ "", - grepl("::", jit_context, fixed = TRUE) ~ sub("::.*", "", jit_context), - TRUE ~ "" - ) - ) - - # Clean up temp columns - samples |> - select(sample_id, timestamp, leaf_sym, leaf_dso, category, jit_context, package) -} - -# --------------------------------------------------------------------------- -# Benchmark-only detection -# --------------------------------------------------------------------------- - -detect_benchmark_range <- function(samples, frames) { - jit_frames <- frames |> filter(is_jitted(dso)) - - if (nrow(jit_frames) == 0) return(c(NA_real_, NA_real_)) - - user_jit <- jit_frames |> - mutate( - pkg = if_else(grepl("::", sym, fixed = TRUE), sub("::.*", "", sym), sym) - ) |> - filter(!(pkg %in% INFRA_PACKAGES)) - - if (nrow(user_jit) == 0) return(c(NA_real_, NA_real_)) - - user_sample_ids <- unique(user_jit$sample_id) - ts <- samples$timestamp[samples$sample_id %in% user_sample_ids] - ts <- ts[!is.na(ts)] - - if (length(ts) == 0) return(c(NA_real_, NA_real_)) - c(min(ts), max(ts)) -} - -# --------------------------------------------------------------------------- -# Output formatting -# --------------------------------------------------------------------------- - -print_table <- function(headers, df, col_widths = NULL) { - mat <- as.matrix(df) - if (is.null(col_widths)) { - col_widths <- vapply(seq_along(headers), function(i) { - max(nchar(headers[i]), max(nchar(mat[, i]), na.rm = TRUE)) + 2L - }, integer(1)) - } - - header_line <- "" - for (i in seq_along(headers)) { - if (i == 1) { - header_line <- paste0(header_line, str_pad(headers[i], col_widths[i], "right")) - } else { - header_line <- paste0(header_line, str_pad(headers[i], col_widths[i], "left")) - } - } - cat(header_line, "\n") - cat(strrep("\u2500", nchar(header_line)), "\n") - - for (r in seq_len(nrow(mat))) { - line <- "" - for (i in seq_along(headers)) { - val <- mat[r, i] - if (i == 1) { - line <- paste0(line, str_pad(val, col_widths[i], "right")) - } else { - line <- paste0(line, str_pad(val, col_widths[i], "left")) - } - } - cat(line, "\n") - } -} - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - -main <- function() { - args <- commandArgs(trailingOnly = TRUE) - - perf_data <- NULL - benchmark_only <- FALSE - top_n <- 30L - - i <- 1L - while (i <= length(args)) { - if (args[i] == "--benchmark-only") { - benchmark_only <- TRUE - } else if (args[i] == "--top") { - i <- i + 1L - top_n <- as.integer(args[i]) - } else if (is.null(perf_data)) { - perf_data <- args[i] - } else { - stop("Unexpected argument: ", args[i]) - } - i <- i + 1L - } - - if (is.null(perf_data)) { - cat("Usage: Rscript analyze-perf.R [--benchmark-only] [--top N]\n", - file = stderr()) - quit(status = 1) - } - - if (!file.exists(perf_data)) { - cat("Error:", perf_data, "not found\n", file = stderr()) - quit(status = 1) - } - - perf_dir <- dirname(perf_data) - - # Parse - parsed <- parse_perf_script(perf_data) - all_samples <- parsed$samples - all_frames <- parsed$frames - message("Parsed ", nrow(all_samples), " samples") - - # Benchmark-only filtering - if (benchmark_only) { - range <- detect_benchmark_range(all_samples, all_frames) - if (is.na(range[1])) { - message("Warning: no user JIT functions found, showing all samples") - } else { - message(sprintf("Benchmark range: %.6f - %.6f (%.3fs)", - range[1], range[2], range[2] - range[1])) - keep_ids <- all_samples$sample_id[ - !is.na(all_samples$timestamp) & - all_samples$timestamp >= range[1] & - all_samples$timestamp <= range[2] - ] - all_samples <- all_samples |> filter(sample_id %in% keep_ids) - all_frames <- all_frames |> filter(sample_id %in% keep_ids) - } - } - - total <- nrow(all_samples) - message("Samples after filtering: ", total) - - if (total == 0) { - cat("No samples found.\n") - quit(status = 0) - } - - # Classify - message("Classifying ...") - samples <- classify_samples(all_samples, all_frames) - - # ── CSV output ── - write_csv(samples, file.path(perf_dir, "samples.csv")) - - # ── Table 1: Category breakdown ── - cat("\n") - cat(strrep("=", 60), "\n") - cat(" Category Breakdown\n") - cat(strrep("=", 60), "\n") - - cat_counts <- samples |> - count(category) |> - mutate(pct = sprintf("%.1f%%", 100 * n / total)) - - cat_order <- tibble(category = ALL_CATEGORIES, ord = seq_along(ALL_CATEGORIES)) - cat_display <- cat_counts |> - left_join(cat_order, by = "category") |> - mutate(ord = if_else(is.na(ord), 999L, as.integer(ord))) |> - arrange(ord) |> - filter(n > 0) |> - select(category, n, pct) |> - bind_rows(tibble(category = "TOTAL", n = total, pct = "100.0%")) - - print_table(c("Category", "Samples", "%"), - cat_display |> mutate(n = as.character(n))) - cat("\n") - - write_csv(cat_display |> filter(category != "TOTAL"), - file.path(perf_dir, "category_summary.csv")) - - # ── Table 2: By R Package ── - display_cats <- intersect(ALL_CATEGORIES, unique(samples$category)) - - cat(strrep("=", 60), "\n") - cat(" By R Package\n") - cat(strrep("=", 60), "\n") - - pkg_summary <- samples |> - count(package, category) |> - pivot_wider(names_from = category, values_from = n, values_fill = 0) - - pkg_summary <- pkg_summary |> - mutate(Total = rowSums(across(where(is.numeric)))) |> - arrange(desc(Total)) - - for (cc in display_cats) { - if (!(cc %in% names(pkg_summary))) pkg_summary[[cc]] <- 0L - } - - pkg_out <- pkg_summary |> - select(Package = package, all_of(display_cats), Total) |> - mutate(across(where(is.numeric), ~ if_else(.x == 0, "", as.character(.x)))) - - print_table(names(pkg_out), pkg_out) - cat("\n") - - # ── Table 3: Top R Functions ── - cat(strrep("=", 60), "\n") - cat(sprintf(" Top %d R Functions (by total samples)\n", top_n)) - cat(strrep("=", 60), "\n") - - func_summary <- samples |> - mutate(jit_context = if_else(is.na(jit_context), "", jit_context)) |> - count(jit_context, category) |> - pivot_wider(names_from = category, values_from = n, values_fill = 0) - - func_summary <- func_summary |> - mutate(Total = rowSums(across(where(is.numeric)))) |> - arrange(desc(Total)) |> - slice_head(n = top_n) - - for (cc in display_cats) { - if (!(cc %in% names(func_summary))) func_summary[[cc]] <- 0L - } - - func_out <- func_summary |> - mutate(`%` = sprintf("%.1f%%", 100 * Total / total)) |> - select(`R Function` = jit_context, all_of(display_cats), Total, `%`) |> - mutate(across(where(is.numeric), ~ if_else(.x == 0, "", as.character(.x)))) - - print_table(names(func_out), func_out) - cat("\n") - - # ── Table 4: C-builtins detail ── - cbuiltins <- samples |> - filter(category == "c-builtins") |> - count(leaf_sym, sort = TRUE) |> - mutate(pct = sprintf("%.1f%%", 100 * n / sum(n))) - - if (nrow(cbuiltins) > 0) { - cat(strrep("=", 60), "\n") - cat(" C-Builtins Detail (leaf symbols)\n") - cat(strrep("=", 60), "\n") - cbuiltins_out <- cbuiltins |> - slice_head(n = top_n) |> - mutate(n = as.character(n)) - print_table(c("Symbol", "Samples", "%"), cbuiltins_out) - cat("\n") - - write_csv(cbuiltins, file.path(perf_dir, "cbuiltins_detail.csv")) - } - - # ── Table 5: Unmatched detail ── - unmatched <- samples |> - filter(category == "unmatched") |> - count(leaf_dso, leaf_sym, sort = TRUE) - - if (nrow(unmatched) > 0) { - cat(strrep("=", 60), "\n") - cat(" Unmatched Samples (unknown DSO/symbol)\n") - cat(strrep("=", 60), "\n") - unmatched_out <- unmatched |> - slice_head(n = top_n) |> - mutate(n = as.character(n)) - print_table(c("DSO", "Symbol", "Samples"), unmatched_out) - cat("\n") - } - - message("CSV files written to ", perf_dir) -} - -main() diff --git a/rcp/inst/perf/harness.R b/rcp/inst/perf/harness.R deleted file mode 100644 index 4a1d5cb..0000000 --- a/rcp/inst/perf/harness.R +++ /dev/null @@ -1,61 +0,0 @@ -# Benchmark harness for run-perf.sh -# Sourced into its own environment to avoid polluting the global namespace. - -harness <- local({ - - # Source a benchmark file, handling setwd so that relative source() calls - # (e.g. source('random.r')) resolve correctly. - load_benchmark <- function(bench_file) { - bench_dir <- dirname(bench_file) - old_wd <- setwd(bench_dir) - on.exit(setwd(old_wd)) - source(basename(bench_file), local = FALSE) - } - - # Run execute(param) `iterations` times and return elapsed times as a vector. - run_benchmark <- function(param, iterations) { - times <- numeric(iterations) - for (i in seq_len(iterations)) { - times[i] <- system.time(execute(param))[["elapsed"]] - } - times - } - - # Get the default parameter for a benchmark's execute() function. - # Returns NA if execute() has no default (e.g. `unused`). - get_default_param <- function() { - f <- formals(execute) - if (length(f) == 0) return(NA) - # A missing default is stored as a symbol; a real default is numeric/etc. - if (typeof(f[[1]]) == "symbol") NA else f[[1]] - } - - # Compile all closures in the global environment using rcp_cmpfun. - # Returns a list with counts of compiled and failed functions. - compile_global_funs <- function() { - env <- globalenv() - nms <- ls(envir = env) - compiled <- 0L - failed <- 0L - for (nm in nms) { - obj <- get(nm, envir = env) - if (is.function(obj)) { - tryCatch({ - print(nm) - assign(nm, rcp_cmpfun(obj, list(name = nm, optimize = 3)), envir = env) - compiled <- compiled + 1L - }, error = function(e) { - failed <<- failed + 1L - }) - } - } - list(compiled = compiled, failed = failed) - } - - # Write timing results to a CSV file. - write_times <- function(times, output_file) { - write.csv(data.frame(time = times), output_file, row.names = FALSE) - } - - environment() -}) diff --git a/rcp/inst/perf/run-perf.sh b/rcp/inst/perf/run-perf.sh deleted file mode 100755 index aab2269..0000000 --- a/rcp/inst/perf/run-perf.sh +++ /dev/null @@ -1,301 +0,0 @@ -#!/usr/bin/env bash -# -# Run areWeFast benchmarks under different profiling modes. -# -# Usage: run-perf.sh [options] [benchmark ...] -# --iterations N Number of runs per benchmark (default: 15) -# --stack-size N DWARF stack size for perf (default: 16384) -# --frequency N Sampling frequency in Hz for both perf and Rprof (default: 99) -# --output DIR Output directory (default: results/) -# --mode MODE,... Comma-separated modes to run (default: all) -# Available: vanilla, vanilla-rprof, rcp, rcp-perf -# --vanilla-r PATH Path to vanilla R bin/ dir (default: /mnt/data-1/krikava/R-4.3.2/bin) -# --project-r PATH Path to project R bin/ dir (default: auto-detect from common.mk) -# If no benchmarks specified, runs all areWeFast benchmarks. - -set -euo pipefail - -# --------------------------------------------------------------------------- -# Defaults -# --------------------------------------------------------------------------- -ITERATIONS=15 -STACK_SIZE=32768 -FREQUENCY=99 -OUTPUT_DIR="results-$FREQUENCY-$STACK_SIZE" -VANILLA_R_DIR="/mnt/data-1/krikava/R-4.3.2/bin" -PROJECT_R_DIR="" -USER_MODES="" - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -ROOT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" -HELPER="$SCRIPT_DIR/harness.R" -BENCH_DIR="$(cd "$ROOT_DIR/../external/rsh/client/rsh/inst/benchmarks/areWeFast" && pwd)" - -ALL_BENCHMARKS=(bounce bounce_nonames bounce_nonames_simple mandelbrot storage towers queens sieve) - -# --------------------------------------------------------------------------- -# Parse arguments -# --------------------------------------------------------------------------- -BENCHMARKS=() - -while [[ $# -gt 0 ]]; do - case "$1" in - --iterations) - ITERATIONS="$2" - shift 2 - ;; - --stack-size) - STACK_SIZE="$2" - shift 2 - ;; - --frequency) - FREQUENCY="$2" - shift 2 - ;; - --output) - OUTPUT_DIR="$2" - shift 2 - ;; - --mode) - USER_MODES="$2" - shift 2 - ;; - --vanilla-r) - VANILLA_R_DIR="$2" - shift 2 - ;; - --project-r) - PROJECT_R_DIR="$2" - shift 2 - ;; - --help | -h) - sed -n '3,12p' "$0" | sed 's/^# *//' - exit 0 - ;; - *) - BENCHMARKS+=("$1") - shift - ;; - esac -done - -if [[ ${#BENCHMARKS[@]} -eq 0 ]]; then - BENCHMARKS=("${ALL_BENCHMARKS[@]}") -fi - -# --------------------------------------------------------------------------- -# Resolve modes -# --------------------------------------------------------------------------- -ALL_MODES=(vanilla vanilla-rprof rcp rcp-perf) - -if [[ -n "$USER_MODES" ]]; then - IFS=',' read -ra MODES <<<"$USER_MODES" - for m in "${MODES[@]}"; do - valid=0 - for a in "${ALL_MODES[@]}"; do [[ "$m" == "$a" ]] && valid=1; done - if [[ $valid -eq 0 ]]; then - echo "Error: unknown mode '$m'. Available: ${ALL_MODES[*]}" >&2 - exit 1 - fi - done -else - MODES=("${ALL_MODES[@]}") -fi - -# --------------------------------------------------------------------------- -# Resolve R binaries -# --------------------------------------------------------------------------- -needs_vanilla=0 -needs_project=0 -for m in "${MODES[@]}"; do - case "$m" in vanilla | vanilla-rprof) needs_vanilla=1 ;; rcp | rcp-perf) needs_project=1 ;; esac -done - -VANILLA_R="$VANILLA_R_DIR/R" -if [[ $needs_vanilla -eq 1 ]] && [[ ! -x "$VANILLA_R" ]]; then - echo "Error: vanilla R not found at $VANILLA_R" >&2 - exit 1 -fi - -if [[ -z "$PROJECT_R_DIR" ]]; then - R_HOME="$(cd "$ROOT_DIR/../external/rsh/external/R" && pwd)" - PROJECT_R_DIR="$R_HOME/bin" -fi -PROJECT_R="$PROJECT_R_DIR/R" -if [[ $needs_project -eq 1 ]] && [[ ! -x "$PROJECT_R" ]]; then - echo "Error: project R not found at $PROJECT_R" >&2 - exit 1 -fi - -echo "Configuration:" -echo " Vanilla R: $VANILLA_R" -echo " Project R: $PROJECT_R" -echo " Iterations: $ITERATIONS" -echo " Stack size: $STACK_SIZE" -echo " Frequency: ${FREQUENCY} Hz" -echo " Output: $OUTPUT_DIR" -echo " Modes: ${MODES[*]}" -echo " Benchmarks: ${BENCHMARKS[*]}" -echo "" - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -# Collect all results for summary table: arrays of "bench mode median" -declare -a SUMMARY_ROWS=() - -median_from_csv() { - local csv="$1" - "$VANILLA_R" --slave -e " - d <- read.csv('$csv') - cat(median(d\$time)) - " -} - -run_mode() { - local bench="$1" mode="$2" - local bench_file="$BENCH_DIR/${bench}.R" - local out="$OUTPUT_DIR/$bench" - - echo " [$mode]" - - case "$mode" in - # ------------------------------------------------------------------- - vanilla) - "$VANILLA_R" --slave --no-restore -e " - source('$HELPER') - harness\$load_benchmark('$bench_file') - param <- harness\$get_default_param() - times <- harness\$run_benchmark(param, ${ITERATIONS}) - harness\$write_times(times, '$out/vanilla.csv') - " - ;; - - # ------------------------------------------------------------------- - vanilla-rprof) - mkdir -p "$out/vanilla-rprof" - "$VANILLA_R" --slave --no-restore -e " - source('$HELPER') - harness\$load_benchmark('$bench_file') - param <- harness\$get_default_param() - Rprof('$out/vanilla-rprof/Rprof.out', interval=1/${FREQUENCY}, memory.profiling=T, gc.profiling=T) - times <- harness\$run_benchmark(param, ${ITERATIONS}) - Rprof(NULL) - harness\$write_times(times, '$out/vanilla-rprof.csv') - " - ;; - - # ------------------------------------------------------------------- - rcp) - "$PROJECT_R" --slave --no-restore -e " - library(rcp) - rcp_cmppkg('base') - source('$HELPER') - harness\$load_benchmark('$bench_file') - param <- harness\$get_default_param() - print(harness\$compile_global_funs()) - times <- harness\$run_benchmark(param, ${ITERATIONS}) - harness\$write_times(times, '$out/rcp.csv') - " - ;; - - # ------------------------------------------------------------------- - rcp-perf) - mkdir -p "$out/rcp-perf" - - RCP_PERF_JIT=1 \ - perf record -F "$FREQUENCY" -g --call-graph "dwarf,$STACK_SIZE" -k1 -e cpu-cycles:u \ - -o "$out/rcp-perf/perf.data" \ - -- "$PROJECT_R" --slave --no-restore -e " - library(rcp) - rcp_cmppkg('base') - source('$HELPER') - harness\$load_benchmark('$bench_file') - param <- harness\$get_default_param() - print(harness\$compile_global_funs()) - times <- harness\$run_benchmark(param, ${ITERATIONS}) - harness\$write_times(times, '$out/rcp-perf.csv') - " - - echo " Injecting JIT symbols..." - perf inject --jit \ - -i "$out/rcp-perf/perf.data" \ - -o "$out/rcp-perf/perf.jit.data" - - echo " Exporting perf script to CSV..." - perf script -i "$out/rcp-perf/perf.jit.data" 2>/dev/null | awk ' -BEGIN { OFS=","; print "sample_id,timestamp,frame_pos,sym,dso"; sid=0 } -/^[^ \t]/ { - sid++ - # Extract timestamp: matches digits.digits followed by colon - ts = "" - for (i = 1; i <= NF; i++) { - if ($i ~ /^[0-9]+\.[0-9]+:$/) { ts = $i; sub(/:$/, "", ts); break } - } - fpos = 0 - next -} -/^\s+[0-9a-f]+ / { - fpos++ - # Parse: addr sym+0xoffset (dso) - addr = $1 - # DSO is last field in parens - dso = $NF; gsub(/[()]/, "", dso) - # Symbol is everything between addr and (dso), minus offset - sym = "" - for (i = 2; i < NF; i++) sym = (sym == "" ? $i : sym " " $i) - sub(/\+0x[0-9a-fA-F]+$/, "", sym) - # CSV-escape fields that might contain commas or quotes - gsub(/"/, "\"\"", sym) - gsub(/"/, "\"\"", dso) - print sid, ts, fpos, "\"" sym "\"", "\"" dso "\"" -} -' >"$out/rcp-perf/frames.csv" - ;; - esac -} - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- -for bench in "${BENCHMARKS[@]}"; do - bench_file="$BENCH_DIR/${bench}.R" - if [[ ! -f "$bench_file" ]]; then - echo "Warning: benchmark $bench_file not found, skipping" >&2 - continue - fi - - echo "=== $bench ===" - mkdir -p "$OUTPUT_DIR/$bench" - - for mode in "${MODES[@]}"; do - run_mode "$bench" "$mode" - - # Record result for summary - csv="$OUTPUT_DIR/$bench/${mode}.csv" - if [[ "$mode" == "vanilla-rprof" ]]; then - csv="$OUTPUT_DIR/$bench/vanilla-rprof.csv" - fi - if [[ -f "$csv" ]]; then - med=$(median_from_csv "$csv") - SUMMARY_ROWS+=("$bench $mode $med") - fi - done - echo "" -done - -# --------------------------------------------------------------------------- -# Summary -# --------------------------------------------------------------------------- -echo "========================================" -echo " Summary (median elapsed time, seconds)" -echo "========================================" -printf "%-30s %-16s %s\n" "Benchmark" "Mode" "Median(s)" -printf "%-30s %-16s %s\n" "------------------------------" "----------------" "--------" -for row in "${SUMMARY_ROWS[@]}"; do - read -r bench mode med <<<"$row" - printf "%-30s %-16s %s\n" "$bench" "$mode" "$med" -done -echo "" -echo "Done." diff --git a/rcp/src/stencils/Makefile b/rcp/src/stencils/Makefile index d2b9894..ecfe299 100644 --- a/rcp/src/stencils/Makefile +++ b/rcp/src/stencils/Makefile @@ -49,7 +49,9 @@ OBJ = $(notdir $(STENCILS_OBJ)) .PHONY: all all: $(OBJ) -$(OBJ): $(OBJ:.o=.c) +RUNTIME_H := $(RSH_HOME)/src/bc2c/runtime.h + +$(OBJ): $(OBJ:.o=.c) $(RUNTIME_H) $(CC) $(CFLAGS) -c $< -o $@ .PHONY: clean diff --git a/rcp/tests/Makefile b/rcp/tests/Makefile index 33ce6e5..dee26df 100644 --- a/rcp/tests/Makefile +++ b/rcp/tests/Makefile @@ -1,4 +1,4 @@ -SUBDIRS = smoketest benchmarks types perf gdb-jit exceptions +SUBDIRS = smoketest benchmarks types gdb-jit exceptions .PHONY: all test clean $(SUBDIRS) diff --git a/rcp/tests/gdb-jit/gdb-next/expected.log b/rcp/tests/gdb-jit/gdb-next/expected.log index 63a4323..1b36224 100644 --- a/rcp/tests/gdb-jit/gdb-next/expected.log +++ b/rcp/tests/gdb-jit/gdb-next/expected.log @@ -16,7 +16,7 @@ Compiling minimal function... Breakpoint 1, __jit_debug_register_code () at gdb_jit.c:XXX 53 { gdb_jit_register (func_name=func_name@entry=0xADDR "f_jit", code_addr=code_addr@entry=0xADDR , code_size=code_size@entry=1848, inst_addrs=inst_addrs@entry=0xADDR, instruction_count=instruction_count@entry=4, stencils=stencils@entry=0xADDR) at gdb_jit.c:XXX -871 __jit_debug_descriptor.action_flag = JIT_NOACTION; +898 __jit_debug_descriptor.action_flag = JIT_NOACTION; Breakpoint 2 at 0xADDR: file /tmp/rcp_jit_XXXXXX/f_jit.S, line 1. > > cat("Executing function...\n") @@ -47,3 +47,4 @@ Stack Top after LDCONST (should be 1):dbl: 1.000000 4 RETURN_OP_ Stack Top after ADD (should be 11):dbl: 11.000000 Function "__jit_debug_register_code" not defined. +rcp 1.0.0.0 (9b10bf4) [gdb] diff --git a/rcp/tests/gdb-jit/gdb-recursion/expected.log b/rcp/tests/gdb-jit/gdb-recursion/expected.log index 26e3551..44822a5 100644 --- a/rcp/tests/gdb-jit/gdb-recursion/expected.log +++ b/rcp/tests/gdb-jit/gdb-recursion/expected.log @@ -16,8 +16,8 @@ Compiling recursive function... Breakpoint 1, __jit_debug_register_code () at gdb_jit.c:XXX 53 { -gdb_jit_register (func_name=func_name@entry=0xADDR "fac", code_addr=code_addr@entry=0xADDR , code_size=code_size@entry=4146, inst_addrs=inst_addrs@entry=0xADDR, instruction_count=instruction_count@entry=12, stencils=stencils@entry=0xADDR) at gdb_jit.c:XXX -871 __jit_debug_descriptor.action_flag = JIT_NOACTION; +gdb_jit_register (func_name=func_name@entry=0xADDR "fac", code_addr=code_addr@entry=0xADDR , code_size=code_size@entry=4129, inst_addrs=inst_addrs@entry=0xADDR, instruction_count=instruction_count@entry=12, stencils=stencils@entry=0xADDR) at gdb_jit.c:XXX +898 __jit_debug_descriptor.action_flag = JIT_NOACTION; Breakpoint 2 at 0xADDR: file /tmp/rcp_jit_XXXXXX/fac.S, line 1. > > cat("Executing recursive function...\n") @@ -97,3 +97,4 @@ Result: 6 Breakpoint 1, __jit_debug_register_code () at gdb_jit.c:XXX 53 { Function "__jit_debug_register_code" not defined. +rcp 1.0.0.0 (9b10bf4) [gdb] diff --git a/rcp/tests/perf/Makefile b/rcp/tests/perf/Makefile deleted file mode 100644 index e69de29..0000000 From 43a3221825f4be35a770adfd94f965ad65bac08c Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Fri, 20 Mar 2026 13:00:44 +0100 Subject: [PATCH 12/17] Update makefile so it keeps flags --- rcp/Makefile | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/rcp/Makefile b/rcp/Makefile index 37a997d..b13b4e1 100644 --- a/rcp/Makefile +++ b/rcp/Makefile @@ -21,12 +21,20 @@ check-toolchain: all: install STAMP := .install-stamp +STAMP_CFG := .install-stamp.cfg + +# Config variables that should trigger a reinstall when changed +CUR_CFG := RCP_COMPILE_PROMISES=$(RCP_COMPILE_PROMISES) RELOC_MODEL=$(RELOC_MODEL) DEBUG=$(DEBUG) OPENMP=$(OPENMP) SPECIALIZE_STEPFOR=$(SPECIALIZE_STEPFOR) SPECIALIZE_SWITCH=$(SPECIALIZE_SWITCH) SPECIALIZE_MAKEPROM=$(SPECIALIZE_MAKEPROM) ALIGN_INSTRUCTIONS=$(ALIGN_INSTRUCTIONS) .PHONY: install install: check-toolchain - @if [ ! -f $(STAMP) ] || \ - [ -n "$$(find src R DESCRIPTION NAMESPACE common.mk -newer $(STAMP) 2>/dev/null | head -1)" ]; then \ - MAKEFLAGS="$(MAKEFLAGS) CC=$(CC) CXX=$(CXX)" $(R) CMD INSTALL . && touch $(STAMP); \ + @config_changed=0; \ + if [ ! -f $(STAMP_CFG) ] || [ "$$(cat $(STAMP_CFG))" != "$(CUR_CFG)" ]; then \ + config_changed=1; \ + fi; \ + if [ ! -f $(STAMP) ] || [ "$$config_changed" = "1" ] || \ + [ -n "$$(find src R DESCRIPTION NAMESPACE common.mk $(RSH_HOME)/src/bc2c/runtime.h -newer $(STAMP) 2>/dev/null | head -1)" ]; then \ + MAKEFLAGS="$(MAKEFLAGS) CC=$(CC) CXX=$(CXX)" $(R) CMD INSTALL . && touch $(STAMP) && echo "$(CUR_CFG)" > $(STAMP_CFG); \ else \ echo "Nothing changed, skipping install."; \ fi @@ -34,7 +42,7 @@ install: check-toolchain .PHONY: clean clean: $(MAKE) -C src -f Makevars clean - rm -f $(STAMP) + rm -f $(STAMP) $(STAMP_CFG) .PHONY: test test: check-toolchain install From 4fc95b4119b3e0c065b2f16698ca1aee22b4a64c Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Fri, 20 Mar 2026 14:21:13 +0100 Subject: [PATCH 13/17] Remove list-tests.sh --- rcp/tests/list-tests.sh | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 rcp/tests/list-tests.sh diff --git a/rcp/tests/list-tests.sh b/rcp/tests/list-tests.sh deleted file mode 100644 index 3a54bb6..0000000 --- a/rcp/tests/list-tests.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -# Discover all test subdirectories -# Output: space-separated list of test directory names -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -ls -d "$SCRIPT_DIR"/*/ 2>/dev/null | xargs -n1 basename | sort | tr '\n' ' ' | sed 's/ $//' From 56e5ba1f0ae8c1054e541dd6666f6bf4ec6d143c Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Fri, 20 Mar 2026 22:12:00 +0100 Subject: [PATCH 14/17] Fix tests with promise compilation --- rcp/src/compile.c | 1 + 1 file changed, 1 insertion(+) diff --git a/rcp/src/compile.c b/rcp/src/compile.c index 37c1986..795b59b 100644 --- a/rcp/src/compile.c +++ b/rcp/src/compile.c @@ -767,6 +767,7 @@ static int can_fallthrough_from_opcode(RCP_BC_OPCODES opcode) switch (opcode) { case (RETURN_BCOP): + case (RETURNJMP_BCOP): case (GOTO_BCOP): case (STARTFOR_BCOP): case (SWITCH_BCOP): From 673572bbcbd42dfae13cf06610e4e4122f4229b6 Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Fri, 20 Mar 2026 22:12:10 +0100 Subject: [PATCH 15/17] Simplify makefile 1 --- rcp/Makefile | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/rcp/Makefile b/rcp/Makefile index e8c7ad1..b84f6c8 100644 --- a/rcp/Makefile +++ b/rcp/Makefile @@ -1,23 +1,4 @@ include common.mk -.PHONY: all -all: install - -.PHONY: check-toolchain -check-toolchain: - @command -v $(CC) >/dev/null 2>&1 || (echo "Missing compiler: $(CC)"; exit 1) - @command -v $(CXX) >/dev/null 2>&1 || (echo "Missing compiler: $(CXX)"; exit 1) - @cc_ver=`$(CC) -dumpfullversion -dumpversion | cut -d. -f1`; \ - if [ "$$cc_ver" != "14" ]; then \ - echo "Expected GCC major version 14 for $(CC), got $$cc_ver"; \ - exit 1; \ - fi - @cxx_ver=`$(CXX) -dumpfullversion -dumpversion | cut -d. -f1`; \ - if [ "$$cxx_ver" != "14" ]; then \ - echo "Expected G++ major version 14 for $(CXX), got $$cxx_ver"; \ - exit 1; \ - fi - @printf 'int main(void){return 0;}\n' | $(CC) $(C_STD_FLAG) -x c -fsyntax-only - - @printf '#include \nint main(){return 0;}\n' | $(CXX) $(CXX_STD_FLAG) -x c++ -fsyntax-only - .PHONY: all all: install @@ -29,7 +10,7 @@ STAMP_CFG := .install-stamp.cfg CUR_CFG := RCP_COMPILE_PROMISES=$(RCP_COMPILE_PROMISES) RELOC_MODEL=$(RELOC_MODEL) DEBUG=$(DEBUG) OPENMP=$(OPENMP) SPECIALIZE_STEPFOR=$(SPECIALIZE_STEPFOR) SPECIALIZE_SWITCH=$(SPECIALIZE_SWITCH) SPECIALIZE_MAKEPROM=$(SPECIALIZE_MAKEPROM) ALIGN_INSTRUCTIONS=$(ALIGN_INSTRUCTIONS) .PHONY: install -install: check-toolchain +install: @config_changed=0; \ if [ ! -f $(STAMP_CFG) ] || [ "$$(cat $(STAMP_CFG))" != "$(CUR_CFG)" ]; then \ config_changed=1; \ @@ -47,8 +28,7 @@ clean: rm -f $(STAMP) $(STAMP_CFG) .PHONY: test -test: check-toolchain - $(MAKE) clean && $(MAKE) install DEBUG=1 RCP_COMPILE_PROMISES=1 +test: install $(MAKE) -C tests test; \ status=$$?; \ $(MAKE) clean \ From 84bff507cae3d3465c300c88ab77f91bff761a0e Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Sat, 21 Mar 2026 11:48:39 +0100 Subject: [PATCH 16/17] Fix build and tests --- rcp/Makefile | 42 +- rcp/tests/gdb-jit/Makefile | 6 +- rcp/tests/gdb-jit/gdb-next/expected.log | 50 --- rcp/tests/gdb-jit/gdb-next/test.gdb | 8 +- rcp/tests/gdb-jit/gdb-recursion/expected.log | 100 ----- rcp/tests/gdb-jit/gdb-recursion/test.gdb | 9 +- rcp/tests/gdb-jit/run-gdb-tests.py | 396 ++++--------------- 7 files changed, 93 insertions(+), 518 deletions(-) delete mode 100644 rcp/tests/gdb-jit/gdb-next/expected.log delete mode 100644 rcp/tests/gdb-jit/gdb-recursion/expected.log mode change 100755 => 100644 rcp/tests/gdb-jit/run-gdb-tests.py diff --git a/rcp/Makefile b/rcp/Makefile index b84f6c8..d0b18b2 100644 --- a/rcp/Makefile +++ b/rcp/Makefile @@ -3,42 +3,21 @@ include common.mk .PHONY: all all: install -STAMP := .install-stamp -STAMP_CFG := .install-stamp.cfg - -# Config variables that should trigger a reinstall when changed -CUR_CFG := RCP_COMPILE_PROMISES=$(RCP_COMPILE_PROMISES) RELOC_MODEL=$(RELOC_MODEL) DEBUG=$(DEBUG) OPENMP=$(OPENMP) SPECIALIZE_STEPFOR=$(SPECIALIZE_STEPFOR) SPECIALIZE_SWITCH=$(SPECIALIZE_SWITCH) SPECIALIZE_MAKEPROM=$(SPECIALIZE_MAKEPROM) ALIGN_INSTRUCTIONS=$(ALIGN_INSTRUCTIONS) - .PHONY: install +BEAR := $(shell command -v bear 2>/dev/null) +BEAR := $(if $(BEAR),$(BEAR) --) + install: - @config_changed=0; \ - if [ ! -f $(STAMP_CFG) ] || [ "$$(cat $(STAMP_CFG))" != "$(CUR_CFG)" ]; then \ - config_changed=1; \ - fi; \ - if [ ! -f $(STAMP) ] || [ "$$config_changed" = "1" ] || \ - [ -n "$$(find src R DESCRIPTION NAMESPACE common.mk $(RSH_HOME)/src/bc2c/runtime.h -newer $(STAMP) 2>/dev/null | head -1)" ]; then \ - MAKEFLAGS="$(MAKEFLAGS) CC=$(CC) CXX=$(CXX)" $(R) CMD INSTALL . && touch $(STAMP) && echo "$(CUR_CFG)" > $(STAMP_CFG); \ - else \ - echo "Nothing changed, skipping install."; \ - fi + $(BEAR) $(R) CMD INSTALL . .PHONY: clean clean: $(MAKE) -C src -f Makevars clean - rm -f $(STAMP) $(STAMP_CFG) + rm -f compile_commands.json .PHONY: test test: install - $(MAKE) -C tests test; \ - status=$$?; \ - $(MAKE) clean \ - exit $$status - -.PHONY: test-functional -test-functional: install - $(MAKE) -C tests/smoketest test - $(MAKE) -C tests/types test - $(MAKE) -C tests/gdb-jit test + $(MAKE) -C tests test .PHONY: run run: install @@ -52,20 +31,13 @@ debug: install format: find src -type f \( -name "*.c" -o -name "*.h" -o -name "*.cpp" \) -exec clang-format -i {} + -.PHONY: setup -setup: check-toolchain - $(R) --quiet -e 'install.packages("microbenchmark", repos="https://cloud.r-project.org")' - BENCH_ITER ?= 15 BENCH_PARALLEL ?= 1 BENCH_OUT_DIR ?= .PHONY: benchmark -benchmark: check-toolchain +benchmark: @$(call ensure_microbenchmark_installed) @RSH_HOME=$(RSH_HOME) R_HOME=$(R_HOME) ./inst/benchmarks/run-benchmarks.sh \ --runs $(BENCH_ITER) --parallel $(BENCH_PARALLEL) \ $(if $(BENCH_OUT_DIR),--output $(BENCH_OUT_DIR)) - -compile_commands.json: - bear -- $(MAKE) clean install diff --git a/rcp/tests/gdb-jit/Makefile b/rcp/tests/gdb-jit/Makefile index 9ccda08..92d8eaa 100644 --- a/rcp/tests/gdb-jit/Makefile +++ b/rcp/tests/gdb-jit/Makefile @@ -2,18 +2,14 @@ include ../../common.mk SUBDIRS = gdb-next gdb-recursion -.PHONY: all test clean re-record +.PHONY: all test clean all: test test: @RCP_GDB_JIT=1 R_HOME=$(R_HOME) python3 ./run-gdb-tests.py $(SUBDIRS) -re-record: - @RCP_GDB_JIT=1 R_HOME=$(R_HOME) python3 ./run-gdb-tests.py --update $(SUBDIRS) - clean: for dir in $(SUBDIRS); do \ rm -f $$dir/actual.log; \ done - diff --git a/rcp/tests/gdb-jit/gdb-next/expected.log b/rcp/tests/gdb-jit/gdb-next/expected.log deleted file mode 100644 index 1b36224..0000000 --- a/rcp/tests/gdb-jit/gdb-next/expected.log +++ /dev/null @@ -1,50 +0,0 @@ -Breakpoint 1 (__jit_debug_register_code) pending. -> library(rcp) -> -> # Minimal function: f(x) = x + 1 -> # Expected Bytecode: -> # 1. GETVAR x -> # 2. LDCONST 1 -> # 3. ADD -> # 4. RETURN -> f <- function(x) x + 1 -> -> cat("Compiling minimal function...\n") -Compiling minimal function... -> f_jit <- rcp::rcp_cmpfun(f, list(name="f_jit")) - -Breakpoint 1, __jit_debug_register_code () at gdb_jit.c:XXX -53 { -gdb_jit_register (func_name=func_name@entry=0xADDR "f_jit", code_addr=code_addr@entry=0xADDR , code_size=code_size@entry=1848, inst_addrs=inst_addrs@entry=0xADDR, instruction_count=instruction_count@entry=4, stencils=stencils@entry=0xADDR) at gdb_jit.c:XXX -898 __jit_debug_descriptor.action_flag = JIT_NOACTION; -Breakpoint 2 at 0xADDR: file /tmp/rcp_jit_XXXXXX/f_jit.S, line 1. -> -> cat("Executing function...\n") -Executing function... -> res <- f_jit(10) - -Breakpoint 2, f_jit (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/f_jit.S:1 -1 GETVAR_OP_ -#0 f_jit (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/f_jit.S:1 -#1 0xADDR in rcpNativeCaller (stack=stack@entry=0xADDR, locals=, call=) at eval.c:XXX -#2 0xADDR in rcpEval (body=body@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#3 0xADDR in Rf_eval (e=e@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#4 0xADDR in R_execClosure (call=call@entry=0xADDR, newrho=newrho@entry=0xADDR, sysparent=, rho=rho@entry=0xADDR, arglist=arglist@entry=0xADDR, op=op@entry=0xADDR) at eval.c:XXX -#5 0xADDR in applyClosure_core (call=call@entry=0xADDR, op=op@entry=0xADDR, arglist=0xADDR, rho=rho@entry=0xADDR, suppliedvars=, unpromise=unpromise@entry=TRUE) at eval.c:XXX -#6 0xADDR in Rf_applyClosure (call=0xADDR, op=0xADDR, arglist=, rho=0xADDR, suppliedvars=, unpromise=TRUE) at eval.c:XXX -#7 Rf_eval (e=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#8 0xADDR in do_set (call=0xADDR, op=0xADDR, args=0xADDR, rho=0xADDR) at Rinlinedfuns.h:XXX -#9 0xADDR in Rf_eval (e=e@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#10 0xADDR in Rf_ReplIteration (rho=rho@entry=0xADDR, savestack=savestack@entry=0, browselevel=browselevel@entry=0, state=state@entry=0xADDR) at main.c:XXX -#11 0xADDR in R_ReplConsole (rho=0xADDR, savestack=savestack@entry=0, browselevel=browselevel@entry=0) at main.c:XXX -#12 0xADDR in run_Rmainloop () at main.c:XXX -#13 0xADDR in Rf_mainloop () at main.c:XXX -#14 0xADDR in main (ac=, av=) at Rmain.c:XXX -2 LDCONST_OP_DBL -Stack Top after GETVAR (should be 10):dbl: 10.000000 -3 ADD_OP_ -Stack Top after LDCONST (should be 1):dbl: 1.000000 -4 RETURN_OP_ -Stack Top after ADD (should be 11):dbl: 11.000000 -Function "__jit_debug_register_code" not defined. -rcp 1.0.0.0 (9b10bf4) [gdb] diff --git a/rcp/tests/gdb-jit/gdb-next/test.gdb b/rcp/tests/gdb-jit/gdb-next/test.gdb index 967d891..74c5d66 100644 --- a/rcp/tests/gdb-jit/gdb-next/test.gdb +++ b/rcp/tests/gdb-jit/gdb-next/test.gdb @@ -11,21 +11,23 @@ break f_jit continue # Line 1 (GETVAR) - we just hit the breakpoint +echo ===BT1_START===\n bt +echo ===BT1_END===\n next # Line 2 (LDCONST) - stepped over GETVAR -echo Stack Top after GETVAR (should be 10): +echo Stack Top after GETVAR (should be 10):\n call rcp_print_stack_val((void*)((char*)stack - 16)) next # Line 3 (ADD) - stepped over LDCONST -echo Stack Top after LDCONST (should be 1): +echo Stack Top after LDCONST (should be 1):\n call rcp_print_stack_val((void*)((char*)stack - 16)) next # Line 4 (RETURN) - stepped over ADD -echo Stack Top after ADD (should be 11): +echo Stack Top after ADD (should be 11):\n call rcp_print_stack_val((void*)((char*)stack - 16)) quit diff --git a/rcp/tests/gdb-jit/gdb-recursion/expected.log b/rcp/tests/gdb-jit/gdb-recursion/expected.log deleted file mode 100644 index 44822a5..0000000 --- a/rcp/tests/gdb-jit/gdb-recursion/expected.log +++ /dev/null @@ -1,100 +0,0 @@ -Breakpoint 1 (__jit_debug_register_code) pending. -> library(rcp) -> -> # Factorial: fac(3) -> fac(2) -> fac(1) -> returns 6 -> fac <- function(x) { -+ if (x <= 1) { -+ return(1) -+ } else { -+ return(x * fac(x - 1)) -+ } -+ } -> -> cat("Compiling recursive function...\n") -Compiling recursive function... -> fac <- rcp::rcp_cmpfun(fac, list(name="fac")) - -Breakpoint 1, __jit_debug_register_code () at gdb_jit.c:XXX -53 { -gdb_jit_register (func_name=func_name@entry=0xADDR "fac", code_addr=code_addr@entry=0xADDR , code_size=code_size@entry=4129, inst_addrs=inst_addrs@entry=0xADDR, instruction_count=instruction_count@entry=12, stencils=stencils@entry=0xADDR) at gdb_jit.c:XXX -898 __jit_debug_descriptor.action_flag = JIT_NOACTION; -Breakpoint 2 at 0xADDR: file /tmp/rcp_jit_XXXXXX/fac.S, line 1. -> -> cat("Executing recursive function...\n") -Executing recursive function... -> res <- fac(3) - -Breakpoint 2, fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:1 -1 GETVAR_OP_ -[GDB] Hit fac (1st call). Backtrace:#0 fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:1 -#1 0xADDR in rcpNativeCaller (stack=stack@entry=0xADDR, locals=, call=) at eval.c:XXX -#2 0xADDR in rcpEval (body=body@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#3 0xADDR in Rf_eval (e=e@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#4 0xADDR in R_execClosure (call=call@entry=0xADDR, newrho=newrho@entry=0xADDR, sysparent=, rho=rho@entry=0xADDR, arglist=arglist@entry=0xADDR, op=op@entry=0xADDR) at eval.c:XXX -#5 0xADDR in applyClosure_core (call=call@entry=0xADDR, op=op@entry=0xADDR, arglist=0xADDR, rho=rho@entry=0xADDR, suppliedvars=, unpromise=unpromise@entry=TRUE) at eval.c:XXX -#6 0xADDR in Rf_applyClosure (call=0xADDR, op=0xADDR, arglist=, rho=0xADDR, suppliedvars=, unpromise=TRUE) at eval.c:XXX -#7 Rf_eval (e=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#8 0xADDR in do_set (call=0xADDR, op=0xADDR, args=0xADDR, rho=0xADDR) at Rinlinedfuns.h:XXX -#9 0xADDR in Rf_eval (e=e@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#10 0xADDR in Rf_ReplIteration (rho=rho@entry=0xADDR, savestack=savestack@entry=0, browselevel=browselevel@entry=0, state=state@entry=0xADDR) at main.c:XXX -#11 0xADDR in R_ReplConsole (rho=0xADDR, savestack=savestack@entry=0, browselevel=browselevel@entry=0) at main.c:XXX -#12 0xADDR in run_Rmainloop () at main.c:XXX -#13 0xADDR in Rf_mainloop () at main.c:XXX -#14 0xADDR in main (ac=, av=) at Rmain.c:XXX - -Breakpoint 2, fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:1 -1 GETVAR_OP_ -[GDB] Hit fac (2nd call - recursive). Backtrace:#0 fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:1 -#1 0xADDR in rcpNativeCaller (stack=stack@entry=0xADDR, locals=, call=) at eval.c:XXX -#2 0xADDR in rcpEval (body=body@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#3 0xADDR in Rsh_Call (stack=0xADDR, call=0xADDR, rho=0xADDR) at runtime.h:XXX -#4 0xADDR in fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:10 -#5 0xADDR in rcpNativeCaller (stack=stack@entry=0xADDR, locals=, call=) at eval.c:XXX -#6 0xADDR in rcpEval (body=body@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#7 0xADDR in Rf_eval (e=e@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#8 0xADDR in R_execClosure (call=call@entry=0xADDR, newrho=newrho@entry=0xADDR, sysparent=, rho=rho@entry=0xADDR, arglist=arglist@entry=0xADDR, op=op@entry=0xADDR) at eval.c:XXX -#9 0xADDR in applyClosure_core (call=call@entry=0xADDR, op=op@entry=0xADDR, arglist=0xADDR, rho=rho@entry=0xADDR, suppliedvars=, unpromise=unpromise@entry=TRUE) at eval.c:XXX -#10 0xADDR in Rf_applyClosure (call=0xADDR, op=0xADDR, arglist=, rho=0xADDR, suppliedvars=, unpromise=TRUE) at eval.c:XXX -#11 Rf_eval (e=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#12 0xADDR in do_set (call=0xADDR, op=0xADDR, args=0xADDR, rho=0xADDR) at Rinlinedfuns.h:XXX -#13 0xADDR in Rf_eval (e=e@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#14 0xADDR in Rf_ReplIteration (rho=rho@entry=0xADDR, savestack=savestack@entry=0, browselevel=browselevel@entry=0, state=state@entry=0xADDR) at main.c:XXX -#15 0xADDR in R_ReplConsole (rho=0xADDR, savestack=savestack@entry=0, browselevel=browselevel@entry=0) at main.c:XXX -#16 0xADDR in run_Rmainloop () at main.c:XXX -#17 0xADDR in Rf_mainloop () at main.c:XXX -#18 0xADDR in main (ac=, av=) at Rmain.c:XXX - -Breakpoint 2, fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:1 -1 GETVAR_OP_ -[GDB] Hit fac (3rd call - recursive). Backtrace:#0 fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:1 -#1 0xADDR in rcpNativeCaller (stack=stack@entry=0xADDR, locals=, call=) at eval.c:XXX -#2 0xADDR in rcpEval (body=body@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#3 0xADDR in Rsh_Call (stack=0xADDR, call=0xADDR, rho=0xADDR) at runtime.h:XXX -#4 0xADDR in fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:10 -#5 0xADDR in rcpNativeCaller (stack=stack@entry=0xADDR, locals=, call=) at eval.c:XXX -#6 0xADDR in rcpEval (body=body@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#7 0xADDR in Rsh_Call (stack=0xADDR, call=0xADDR, rho=0xADDR) at runtime.h:XXX -#8 0xADDR in fac (stack=0xADDR, stack@entry=, locals=0xADDR, locals@entry=) at /tmp/rcp_jit_XXXXXX/fac.S:10 -#9 0xADDR in rcpNativeCaller (stack=stack@entry=0xADDR, locals=, call=) at eval.c:XXX -#10 0xADDR in rcpEval (body=body@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#11 0xADDR in Rf_eval (e=e@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#12 0xADDR in R_execClosure (call=call@entry=0xADDR, newrho=newrho@entry=0xADDR, sysparent=, rho=rho@entry=0xADDR, arglist=arglist@entry=0xADDR, op=op@entry=0xADDR) at eval.c:XXX -#13 0xADDR in applyClosure_core (call=call@entry=0xADDR, op=op@entry=0xADDR, arglist=0xADDR, rho=rho@entry=0xADDR, suppliedvars=, unpromise=unpromise@entry=TRUE) at eval.c:XXX -#14 0xADDR in Rf_applyClosure (call=0xADDR, op=0xADDR, arglist=, rho=0xADDR, suppliedvars=, unpromise=TRUE) at eval.c:XXX -#15 Rf_eval (e=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#16 0xADDR in do_set (call=0xADDR, op=0xADDR, args=0xADDR, rho=0xADDR) at Rinlinedfuns.h:XXX -#17 0xADDR in Rf_eval (e=e@entry=0xADDR, rho=rho@entry=0xADDR) at eval.c:XXX -#18 0xADDR in Rf_ReplIteration (rho=rho@entry=0xADDR, savestack=savestack@entry=0, browselevel=browselevel@entry=0, state=state@entry=0xADDR) at main.c:XXX -#19 0xADDR in R_ReplConsole (rho=0xADDR, savestack=savestack@entry=0, browselevel=browselevel@entry=0) at main.c:XXX -#20 0xADDR in run_Rmainloop () at main.c:XXX -#21 0xADDR in Rf_mainloop () at main.c:XXX -#22 0xADDR in main (ac=, av=) at Rmain.c:XXX -> stopifnot(res == 6) -> cat("Result:", res, "\n") -Result: 6 -> - -Breakpoint 1, __jit_debug_register_code () at gdb_jit.c:XXX -53 { -Function "__jit_debug_register_code" not defined. -rcp 1.0.0.0 (9b10bf4) [gdb] diff --git a/rcp/tests/gdb-jit/gdb-recursion/test.gdb b/rcp/tests/gdb-jit/gdb-recursion/test.gdb index 0e533b9..83175f2 100644 --- a/rcp/tests/gdb-jit/gdb-recursion/test.gdb +++ b/rcp/tests/gdb-jit/gdb-recursion/test.gdb @@ -14,18 +14,21 @@ break fac continue # Hit fac(3) -echo [GDB] Hit fac (1st call). Backtrace: +echo ===BT1_START===\n bt +echo ===BT1_END===\n continue # Hit fac(2) -echo [GDB] Hit fac (2nd call - recursive). Backtrace: +echo ===BT2_START===\n bt +echo ===BT2_END===\n continue # Hit fac(1) -echo [GDB] Hit fac (3rd call - recursive). Backtrace: +echo ===BT3_START===\n bt +echo ===BT3_END===\n continue quit diff --git a/rcp/tests/gdb-jit/run-gdb-tests.py b/rcp/tests/gdb-jit/run-gdb-tests.py old mode 100755 new mode 100644 index 2f44a1a..a8c1f0e --- a/rcp/tests/gdb-jit/run-gdb-tests.py +++ b/rcp/tests/gdb-jit/run-gdb-tests.py @@ -1,331 +1,83 @@ #!/usr/bin/env python3 -""" -GDB debugging test runner for RCP JIT. +"""GDB JIT test runner — validates structural properties of backtraces.""" -Combines test discovery, execution, and output validation into a single script. -Runs GDB tests in specified directories and reports results with colored output. +import os, re, subprocess, sys -Usage: - ./run-gdb-tests.py [--update] TEST_DIRS... - -Examples: - ./run-gdb-tests.py gdb-basic gdb-next # Run specific tests - ./run-gdb-tests.py --update gdb-basic # Update expected output -""" - -import os -import re -import shlex -import subprocess -import sys -import difflib -from pathlib import Path - -try: - from rich.console import Console - from rich.syntax import Syntax - from rich import print as rprint - RICH_AVAILABLE = True -except ImportError: - RICH_AVAILABLE = False - def rprint(*args, **kwargs): - # Strip rich markup for fallback - import re - text = " ".join(str(a) for a in args) - text = re.sub(r'\[/?[^\]]+\]', '', text) - print(text, **kwargs) - -# Patterns to ignore in output normalization -IGNORE_LINES = [ - r'^\[Thread debugging using libthread_db enabled\]$', - r'^Using host libthread_db library .*', - r"^warning: could not find '\.gnu_debugaltlink' .*", - r'^\[Detaching after vfork from child process PID\]$', - r'^warning: Error disabling address space randomization: Operation not permitted$' -] -IGNORE_PATTERNS = [re.compile(p) for p in IGNORE_LINES] - -# Normalization patterns -HEX_PATTERN = re.compile(r'0x[0-9a-fA-F]+') -JIT_PATH_PATTERN = re.compile(r'/tmp/rcp_jit_[a-zA-Z0-9]+/') -PROCESS_PATTERN = re.compile(r'process \d+') -THREAD_PATTERN = re.compile(r'Thread \d+') -C_LINE_PATTERN = re.compile(r' at (?:.*[/\\])?([^/\\]+\.[ch]):\d+') - -# Patterns indicating broken backtraces (should never appear in expected output) -BACKTRACE_BAD_PATTERNS = [ - re.compile(r'#\d+\s+0xADDR in \?\? \(\)'), - re.compile(r'Backtrace stopped:.*corrupt stack'), -] - -# Test timeout in seconds +R_HOME = os.environ["R_HOME"] +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) TIMEOUT = 60 - -def normalize_output(content: str) -> str: - """Normalize GDB output to allow comparison across runs.""" - lines = content.splitlines() - normalized_lines = [] - - for line in lines: - line = line.strip() - - # Normalize variable parts - line = HEX_PATTERN.sub('0xADDR', line) - line = JIT_PATH_PATTERN.sub('/tmp/rcp_jit_XXXXXX/', line) - line = PROCESS_PATTERN.sub('process PID', line) - line = THREAD_PATTERN.sub('Thread PID', line) - line = C_LINE_PATTERN.sub(r' at \1:XXX', line) - - # Skip ignored lines - if any(p.match(line) for p in IGNORE_PATTERNS): +def check_gdb_jit(): + r = subprocess.run( + [f"{R_HOME}/bin/Rscript", "-e", + "library(rcp); if(!.Call('rcp_gdb_jit_support', PACKAGE='rcp')) quit(status=1)"], + capture_output=True, timeout=30, env={**os.environ, "RCP_GDB_JIT": "1"}) + return r.returncode == 0 + +def run_gdb(test_dir): + r_bin = f"{R_HOME}/bin/exec/R" + return subprocess.run( + ["gdb", "-q", "-batch", "-x", "test.gdb", "--args", r_bin, "-q", "-f", "test.R"], + capture_output=True, timeout=TIMEOUT, cwd=test_dir, + env={**os.environ, "LD_LIBRARY_PATH": f"{R_HOME}/lib", + "R_HOME": R_HOME, "RCP_GDB_JIT": "1"} + ).stdout.decode("utf-8", errors="replace") + +def extract_bt(output, tag): + m = re.search(f"==={tag}_START===\\n(.*?)==={tag}_END===", output, re.DOTALL) + return m.group(1) if m else None + +def bt_frames(bt): + return [l for l in bt.splitlines() if re.match(r"#\d+", l)] + +def check(cond, msg, errors): + if not cond: + errors.append(msg) + +def test_gdb_recursion(output, errors): + check("Result: 6" in output, "missing 'Result: 6'", errors) + check("corrupt stack" not in output.lower(), "corrupt stack detected", errors) + for i, tag in enumerate(["BT1", "BT2", "BT3"], 1): + bt = extract_bt(output, tag) + check(bt is not None, f"{tag} section not found", errors) + if not bt: continue - - normalized_lines.append(line) - - return "\n".join(normalized_lines) + "\n" - - -def check_gdb_jit_support(r_home: str) -> bool: - """Check if GDB JIT support is enabled in rcp.""" - rscript = os.path.join(r_home, "bin", "Rscript") - cmd = [rscript, "-e", - "library(rcp); if(!.Call('rcp_gdb_jit_support', PACKAGE='rcp')) quit(status=1)"] - - env = os.environ.copy() - env["RCP_GDB_JIT"] = "1" - - try: - result = subprocess.run(cmd, capture_output=True, timeout=30, env=env) - return result.returncode == 0 - except (subprocess.TimeoutExpired, FileNotFoundError): - return False - - -def run_single_test(test_dir: Path, r_home: str, update_mode: bool = False) -> tuple[bool, str]: - """ - Run a single GDB test in the specified directory. - - Returns: - (success, message) tuple - """ - test_gdb = test_dir / "test.gdb" - test_r = test_dir / "test.R" - expected_file = test_dir / "expected.log" - actual_file = test_dir / "actual.log" - - # Validate test directory - if not test_gdb.exists(): - return False, f"Missing test.gdb in {test_dir}" - if not test_r.exists(): - return False, f"Missing test.R in {test_dir}" - - r_bin = os.path.join(r_home, "bin", "exec", "R") - r_lib = os.path.join(r_home, "lib") - - if not os.path.exists(r_bin): - return False, f"R binary not found at {r_bin}" - - # Build GDB command - gdb_cmd = [ - "gdb", "-q", "-batch", - "-x", str(test_gdb), - "--args", r_bin, "-q", "-f", str(test_r) - ] - - env = os.environ.copy() - env["LD_LIBRARY_PATH"] = r_lib - env["R_HOME"] = r_home - env["RCP_GDB_JIT"] = "1" - - # Run GDB - env_prefix = f"LD_LIBRARY_PATH={shlex.quote(r_lib)} R_HOME={shlex.quote(r_home)} RCP_GDB_JIT=1" - print(f"$ {env_prefix} {shlex.join(gdb_cmd)}") - try: - result = subprocess.run( - gdb_cmd, - capture_output=True, - timeout=TIMEOUT, - env=env, - cwd=test_dir - ) - output = result.stdout.decode('utf-8', errors='replace') - output += result.stderr.decode('utf-8', errors='replace') - except subprocess.TimeoutExpired: - return False, "GDB command timed out" - except Exception as e: - return False, f"GDB command failed: {e}" - - # Write actual output - with open(actual_file, 'w', encoding='utf-8') as f: - f.write(output) - - # Normalize output - normalized_actual = normalize_output(output) - - # Backtrace quality check - bad_lines = [] - for line in normalized_actual.splitlines(): - for pat in BACKTRACE_BAD_PATTERNS: - if pat.search(line): - bad_lines.append(line.strip()) - break - - # Update mode: write expected output and return - if update_mode: - if bad_lines: - msg = "Updated expected output, but WARNING: broken backtrace detected:\n" - for bl in bad_lines: - msg += f" {bl}\n" - with open(expected_file, 'w', encoding='utf-8') as f: - f.write(normalized_actual) - return False, msg - with open(expected_file, 'w', encoding='utf-8') as f: - f.write(normalized_actual) - return True, "Updated expected output" - - # Compare with expected output - if not expected_file.exists(): - return False, f"Missing {expected_file}. Use --update to create it." - - with open(expected_file, 'r', encoding='utf-8', errors='replace') as f: - expected_content = f.read() - - # Sanity check: warn if expected.log itself contains broken backtraces - for line in expected_content.splitlines(): - for pat in BACKTRACE_BAD_PATTERNS: - if pat.search(line): - return False, ( - f"expected.log contains broken backtrace pattern:\n" - f" {line.strip()}\n" - f"Re-record with --update after fixing the backtrace issue." - ) - - if normalized_actual == expected_content: - if bad_lines: - return False, ( - "Output matches expected, but backtrace is broken:\n" + - "\n".join(f" {bl}" for bl in bad_lines) - ) - return True, "Output matches expected" - else: - # Generate diff - diff = difflib.unified_diff( - expected_content.splitlines(), - normalized_actual.splitlines(), - fromfile=f"Expected ({expected_file.name})", - tofile="Actual (normalized)", - lineterm="" - ) - diff_text = "\n".join(diff) - return False, diff_text - - -def print_diff(diff_text: str): - """Print diff with syntax highlighting if available.""" - if RICH_AVAILABLE and sys.stdout.isatty(): - console = Console() - syntax = Syntax(diff_text, "diff", theme="ansi_dark", line_numbers=False) - console.print(syntax) - else: - print(diff_text) - - -def main(): - # Parse arguments manually - args = sys.argv[1:] - update_mode = "--update" in args - if update_mode: - args.remove("--update") - test_dirs_args = args - - # Get R_HOME - r_home = os.environ.get("R_HOME") - if not r_home: - rprint("[bold red]Error:[/bold red] R_HOME environment variable is not set.") - sys.exit(1) - - # Check GDB JIT support - rprint("[bold blue]Checking for GDB JIT support...[/bold blue]") - if not check_gdb_jit_support(r_home): - rprint("[yellow]Skipping debugging tests (GDB JIT support not available)[/yellow]") - sys.exit(0) - rprint("[green]GDB JIT support enabled.[/green]") - - # Determine script directory and test directories - script_dir = Path(__file__).parent.resolve() - - if not test_dirs_args: - rprint("[yellow]No test directories specified. Nothing to run.[/yellow]") - sys.exit(0) - - test_dirs = [Path(d) if os.path.isabs(d) else script_dir / d - for d in test_dirs_args] - - # Validate directories exist - for d in test_dirs: - if not d.exists(): - rprint(f"[bold red]Error:[/bold red] Test directory not found: {d}") - sys.exit(1) - - # Run tests - rprint() - rprint(f"[bold]Running {len(test_dirs)} test(s)...[/bold]") - rprint() - - total = 0 - passed = 0 - failed = 0 - failures = [] - - for test_dir in test_dirs: - total += 1 - test_name = test_dir.name - - rprint(f"[bold cyan]{'─' * 60}[/bold cyan]") - rprint(f"[bold]Test:[/bold] {test_name}") - - success, message = run_single_test( - test_dir, r_home, - update_mode=update_mode - ) - - if success: - passed += 1 - if update_mode: - rprint(f" [bold blue]UPDATED[/bold blue] {message}") - else: - rprint(f" [bold green]PASS[/bold green] {message}") - else: - failed += 1 - failures.append(test_name) - rprint(f" [bold red]FAIL[/bold red]") - if message and not message.startswith("---"): - rprint(f" {message}") - elif message: - # It's a diff - print_diff(message) - - # Summary - rprint() - rprint(f"[bold cyan]{'═' * 60}[/bold cyan]") - rprint(f"[bold]Summary:[/bold]") - rprint(f" Total: {total}") - rprint(f" Passed: [green]{passed}[/green]") - rprint(f" Failed: [red]{failed}[/red]") - - if failures: - rprint() - rprint("[bold red]Failed tests:[/bold red]") - for name in failures: - rprint(f" • {name}") - - rprint(f"[bold cyan]{'═' * 60}[/bold cyan]") - - if failed > 0: - sys.exit(1) + frames = bt_frames(bt) + check(not any("?? ()" in f for f in frames), f"{tag}: unresolved frames", errors) + check(any("main" in f for f in frames[-1:]), f"{tag}: doesn't end with main", errors) + fac_count = sum(1 for f in frames if re.search(r"\bfac\b", f)) + check(fac_count == i, f"{tag}: expected {i} fac frame(s), got {fac_count}", errors) + +def test_gdb_next(output, errors): + check("corrupt stack" not in output.lower(), "corrupt stack detected", errors) + bt = extract_bt(output, "BT1") + check(bt is not None, "BT1 section not found", errors) + if bt: + frames = bt_frames(bt) + check(not any("?? ()" in f for f in frames), "BT1: unresolved frames", errors) + check(any("main" in f for f in frames[-1:]), "BT1: doesn't end with main", errors) + check(any("f_jit" in f for f in frames), "BT1: f_jit not in backtrace", errors) + for val in ["10.000000", "1.000000", "11.000000"]: + check(f"dbl: {val}" in output, f"missing 'dbl: {val}'", errors) + +TESTS = {"gdb-recursion": test_gdb_recursion, "gdb-next": test_gdb_next} + +if not check_gdb_jit(): + print("Skipping debugging tests (GDB JIT support not available)") sys.exit(0) +failed = 0 +for name in sys.argv[1:]: + test_dir = os.path.join(SCRIPT_DIR, name) + output = run_gdb(test_dir) + errors = [] + TESTS[name](output, errors) + if errors: + failed += 1 + print(f" FAIL {name}") + for e in errors: + print(f" - {e}") + else: + print(f" PASS {name}") -if __name__ == "__main__": - main() +sys.exit(1 if failed else 0) From 9509b0c27769ce20e914305dec6b884294f8767d Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Sat, 21 Mar 2026 14:10:39 +0100 Subject: [PATCH 17/17] Fix test task --- .github/workflows/benchmarks.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index e1f2b01..016f890 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -102,7 +102,7 @@ jobs: id: set-matrix shell: bash run: | - json=$(bash rcp/tests/list-tests.sh | tr ' ' '\n' | jq -R . | jq -sc '{"test": .}') + json=$(find rcp/tests -mindepth 1 -maxdepth 1 -type d -printf '%f\n' | sort | jq -R . | jq -sc '{"test": .}') echo "Discovered tests: $json" echo "matrix=$json" >> "$GITHUB_OUTPUT" @@ -112,7 +112,6 @@ jobs: needs: build-images strategy: fail-fast: false - max-parallel: 1 matrix: ${{ fromJson(needs.build-images.outputs.test-matrix) }} permissions: contents: read