diff --git a/R/cpp11.R b/R/cpp11.R index 471107115..0189443cb 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -16,6 +16,14 @@ leadingSpaces <- function(lines) { .Call(`_roxygen2_leadingSpaces`, lines) } +escape_rd_for_md_c <- function(text) { + .Call(`_roxygen2_escape_rd_for_md_c`, text) +} + +unescape_rd_for_md_c <- function(rd_text, tags) { + .Call(`_roxygen2_unescape_rd_for_md_c`, rd_text, tags) +} + tokenise_block <- function(lines, file, offset) { .Call(`_roxygen2_tokenise_block`, lines, file, offset) } diff --git a/R/markdown-escaping.R b/R/markdown-escaping.R index 540234f26..5924f9c67 100644 --- a/R/markdown-escaping.R +++ b/R/markdown-escaping.R @@ -4,7 +4,6 @@ #' `escape_rd_for_md()` replaces fragile Rd tags with placeholders, to avoid #' interpreting them as markdown. `unescape_rd_for_md()` puts the original #' text back in place of the placeholders after the markdown parsing is done. -#' The fragile tags are listed in `escaped_for_md`. #' #' Some Rd macros are treated specially: #' @@ -21,256 +20,38 @@ #' @rdname markdown-internals #' @keywords internal escape_rd_for_md <- function(text) { - rd_tags <- find_fragile_rd_tags(text, escaped_for_md) - protected <- protect_rd_tags(text, rd_tags) - double_escape_md(protected) + result <- escape_rd_for_md_c(text) + out <- result$text + attr(out, "roxygen-markdown-subst") <- as.character(result$tags) + out } -escaped_for_md <- paste0( - "\\", - c( - "acronym", - "code", - "command", - "CRANpkg", - "deqn", - "doi", - "dontrun", - "dontshow", - "donttest", - "email", - "env", - "eqn", - "figure", - "file", - "if", - "ifelse", - "kbd", - "link", - "linkS4class", - "method", - "mjeqn", - "mjdeqn", - "mjseqn", - "mjsdeqn", - "mjteqn", - "mjtdeqn", - "newcommand", - "option", - "out", - "packageAuthor", - "packageDescription", - "packageDESCRIPTION", - "packageIndices", - "packageMaintainer", - "packageTitle", - "pkg", - "PR", - "preformatted", - "renewcommand", - "S3method", - "S4method", - "samp", - "special", - "testonly", - "url", - "var", - "verb" - ) -) - #' @param rd_text The markdown parsed and interpreted text. #' @param esc_text The original escaped text from #' `escape_rd_for_md()`. #' @rdname markdown-internals unescape_rd_for_md <- function(rd_text, esc_text) { - id <- attr(esc_text, "roxygen-markdown-subst")$id - tags <- attr(esc_text, "roxygen-markdown-subst")$tags - - for (i in seq_len(nrow(tags))) { - ph <- paste0(id, "-", i, "-") - rd_text <- sub(ph, tags$text[i], rd_text, fixed = TRUE) - } - - rd_text -} - -#' Find all fragile tags (int the supplied list) in the text -#' -#' Ignore the tags that are embedded into a fragile tag. -#' -#' @param text Input text, character scalar. -#' @param fragile Character vector of fragile tags to find. -#' @return Data frame of fragile tags, with columns: -#' `tag`, `start`, `end`, `argend`, -#' `text`. -#' -#' @noRd - -find_fragile_rd_tags <- function(text, fragile) { - tags <- find_all_rd_tags(text) - ftags <- tags[tags$tag %in% fragile, ] - - ## Remove embedded ones - keep <- map_lgl(seq_len(nrow(ftags)), function(i) { - sum(ftags$start <= ftags$start[i] & ftags$argend >= ftags$argend[i]) == 1 - }) - - ftags <- ftags[keep, ] - - if (nrow(ftags)) { - ftags$text <- str_sub(text, ftags$start, ftags$argend) - } - - ftags -} - -#' Find all (complete) Rd tags in a string -#' -#' Complete means that we include the argument(s) as well. -#' -#' @param text Input text, character scalar. -#' -#' @noRd - -find_all_rd_tags <- function(text) { - text_len <- nchar(text) - - ## Find the tag names - tags <- find_all_tag_names(text) - - ## Find the end of the argument list for each tag. Note that - ## tags might be embedded into the arguments of other tags. - tags$argend <- map_int(seq_len(nrow(tags)), function(i) { - tag_plus <- str_sub(text, tags$end[i], text_len) - findEndOfTag(tag_plus, is_code = FALSE) + tags$end[i] - }) - - tags -} - -#' Find all tag names in a string -#' -#' Note that we also protect these tags within code, strings -#' and comments, for now. We'll see if this causes any -#' problems. -#' -#' @param text Input text, scalar. -#' @return Data frame, with columns: `tag`, `start`, -#' `end`. -#' -#' @noRd - -find_all_tag_names <- function(text) { - ## Find the tags without arguments first - tag_pos <- str_locate_all(text, r"(\\[a-zA-Z][a-zA-Z0-9]*)")[[1]] - - data.frame( - tag = str_sub(text, tag_pos[, "start"], tag_pos[, "end"]), - as.data.frame(tag_pos) - ) -} - -#' Replace fragile Rd tags with placeholders -#' -#' @param text The text, character scalar. -#' @param rd_tags Fragile Rd tags, in a data frame, -#' as returned by `find_fragile_rd_tags`. -#' @return Text, after the substitution. The original -#' text is added as an attribute. -#' -#' @noRd - -protect_rd_tags <- function(text, rd_tags) { - id <- make_random_string() - - text <- str_sub_same(text, rd_tags, id) - - attr(text, "roxygen-markdown-subst") <- - list(tags = rd_tags, id = id) - - text -} - -#' Replace parts of the same string -#' -#' It assumes that the intervals to be replaced do not -#' overlap. Gives an error otherwise. -#' -#' @param str String scalar. -#' @param repl Data frame with columns: `start`, `end`, -#' `argend`, `text`. -#' @param id Placeholder string. -#' @return Input string with the replacements performed. -#' Note that all replacements are performed in parallel, -#' at least conceptually. -#' -#' @noRd - -str_sub_same <- function(str, repl, id) { - repl <- repl[order(repl$start), ] - - if (is.unsorted(repl$end) || is.unsorted(repl$argend)) { - cli::cli_abort("Replacement intervals must not overlap.", .internal = TRUE) - } - - for (i in seq_len(nrow(repl))) { - ## The trailing - is needed, to distinguish between -1 and -10 - new_text <- paste0(id, "-", i, "-") - str_sub(str, repl$start[i], repl$argend[i]) <- new_text - - ## Need to shift other coordinates (we shift everything, - ## it is just simpler). - inc <- nchar(new_text) - (repl$argend[i] - repl$start[i] + 1) - repl$start <- repl$start + inc - repl$end <- repl$end + inc - repl$argend <- repl$argend + inc - } - - str -} - -#' Make a random string -#' -#' We use this as the placeholder, to make sure that the -#' placeholder does not appear in the text. -#' -#' @return String scalar -#' -#' @noRd - -make_random_string <- function(length = 32) { - paste( - sample(c(LETTERS, letters, 0:9), length, replace = TRUE), - collapse = "" - ) + unescape_rd_for_md_c(rd_text, attr(esc_text, "roxygen-markdown-subst")) } #' Check markdown escaping #' +#' @description #' This is a regression test for Markdown escaping. #' -#' @details #' Each of the following bullets should look the same when rendered: #' #' * Backticks: `\`, `\%`, `\$`, `\_` #' * `\verb{}`: \verb{\\}, \verb{\\%}, \verb{\$}, \verb{\_} #' #' \[ this isn't a link \] +#' #' \\[ neither is this \\] #' -#' @param text Input text. -#' @return Double-escaped text. +#' @name double_escape_md #' @keywords internal #' @examples #' "%" # percent #' "\"" # double quote #' '\'' # single quote -double_escape_md <- function(text) { - text <- gsub(r"(\)", r"(\\)", text, fixed = TRUE) - - # De-dup escaping used to avoid [] creating a link - text <- gsub(r"(\\[)", r"(\[)", text, fixed = TRUE) - text <- gsub(r"(\\])", r"(\])", text, fixed = TRUE) - text -} +NULL diff --git a/man/double_escape_md.Rd b/man/double_escape_md.Rd index 22627fbaf..6066d6d25 100644 --- a/man/double_escape_md.Rd +++ b/man/double_escape_md.Rd @@ -3,19 +3,9 @@ \name{double_escape_md} \alias{double_escape_md} \title{Check markdown escaping} -\usage{ -double_escape_md(text) -} -\arguments{ -\item{text}{Input text.} -} -\value{ -Double-escaped text. -} \description{ This is a regression test for Markdown escaping. -} -\details{ + Each of the following bullets should look the same when rendered: \itemize{ \item Backticks: \verb{\\}, \verb{\\\%}, \verb{\\$}, \verb{\\_} @@ -23,6 +13,7 @@ Each of the following bullets should look the same when rendered: } [ this isn't a link ] + \[ neither is this \] } \examples{ diff --git a/man/markdown-internals.Rd b/man/markdown-internals.Rd index 22e5234d5..f4ca4fd0b 100644 --- a/man/markdown-internals.Rd +++ b/man/markdown-internals.Rd @@ -30,7 +30,6 @@ original text is added as an attribute for each placeholder. \code{escape_rd_for_md()} replaces fragile Rd tags with placeholders, to avoid interpreting them as markdown. \code{unescape_rd_for_md()} puts the original text back in place of the placeholders after the markdown parsing is done. -The fragile tags are listed in \code{escaped_for_md}. Some Rd macros are treated specially: \itemize{ diff --git a/src/cpp11.cpp b/src/cpp11.cpp index f51872138..cd6f36286 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -33,6 +33,20 @@ extern "C" SEXP _roxygen2_leadingSpaces(SEXP lines) { return cpp11::as_sexp(leadingSpaces(cpp11::as_cpp>(lines))); END_CPP11 } +// markdown-escaping.cpp +cpp11::list escape_rd_for_md_c(std::string text); +extern "C" SEXP _roxygen2_escape_rd_for_md_c(SEXP text) { + BEGIN_CPP11 + return cpp11::as_sexp(escape_rd_for_md_c(cpp11::as_cpp>(text))); + END_CPP11 +} +// markdown-escaping.cpp +std::string unescape_rd_for_md_c(std::string rd_text, cpp11::strings tags); +extern "C" SEXP _roxygen2_unescape_rd_for_md_c(SEXP rd_text, SEXP tags) { + BEGIN_CPP11 + return cpp11::as_sexp(unescape_rd_for_md_c(cpp11::as_cpp>(rd_text), cpp11::as_cpp>(tags))); + END_CPP11 +} // parser2.cpp cpp11::list tokenise_block(cpp11::strings lines, std::string file, int offset); extern "C" SEXP _roxygen2_tokenise_block(SEXP lines, SEXP file, SEXP offset) { @@ -57,13 +71,15 @@ extern "C" SEXP _roxygen2_wrapUsage(SEXP string, SEXP width, SEXP indent) { extern "C" { static const R_CallMethodDef CallEntries[] = { - {"_roxygen2_escapeExamples", (DL_FUNC) &_roxygen2_escapeExamples, 1}, - {"_roxygen2_findEndOfTag", (DL_FUNC) &_roxygen2_findEndOfTag, 2}, - {"_roxygen2_find_includes", (DL_FUNC) &_roxygen2_find_includes, 1}, - {"_roxygen2_leadingSpaces", (DL_FUNC) &_roxygen2_leadingSpaces, 1}, - {"_roxygen2_rdComplete", (DL_FUNC) &_roxygen2_rdComplete, 2}, - {"_roxygen2_tokenise_block", (DL_FUNC) &_roxygen2_tokenise_block, 3}, - {"_roxygen2_wrapUsage", (DL_FUNC) &_roxygen2_wrapUsage, 3}, + {"_roxygen2_escapeExamples", (DL_FUNC) &_roxygen2_escapeExamples, 1}, + {"_roxygen2_escape_rd_for_md_c", (DL_FUNC) &_roxygen2_escape_rd_for_md_c, 1}, + {"_roxygen2_findEndOfTag", (DL_FUNC) &_roxygen2_findEndOfTag, 2}, + {"_roxygen2_find_includes", (DL_FUNC) &_roxygen2_find_includes, 1}, + {"_roxygen2_leadingSpaces", (DL_FUNC) &_roxygen2_leadingSpaces, 1}, + {"_roxygen2_rdComplete", (DL_FUNC) &_roxygen2_rdComplete, 2}, + {"_roxygen2_tokenise_block", (DL_FUNC) &_roxygen2_tokenise_block, 3}, + {"_roxygen2_unescape_rd_for_md_c", (DL_FUNC) &_roxygen2_unescape_rd_for_md_c, 2}, + {"_roxygen2_wrapUsage", (DL_FUNC) &_roxygen2_wrapUsage, 3}, {NULL, NULL, 0} }; } diff --git a/src/markdown-escaping.cpp b/src/markdown-escaping.cpp new file mode 100644 index 000000000..2e266943e --- /dev/null +++ b/src/markdown-escaping.cpp @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include + +// Rd tags that are "fragile" and must be protected from markdown parsing. +// If you update this list, also update the hardcoded list in vignettes/rd-formatting.Rmd. +// clang-format off +static const std::unordered_set fragile_tags = { + "\\acronym", "\\code", "\\command", "\\CRANpkg", + "\\deqn", "\\doi", "\\dontrun", "\\dontshow", "\\donttest", + "\\email", "\\env", "\\eqn", + "\\figure", "\\file", + "\\if", "\\ifelse", + "\\kbd", + "\\link", "\\linkS4class", + "\\method", "\\mjeqn", "\\mjdeqn", "\\mjseqn", "\\mjsdeqn", + "\\mjteqn", "\\mjtdeqn", + "\\newcommand", + "\\option", "\\out", + "\\packageAuthor", "\\packageDescription", "\\packageDESCRIPTION", + "\\packageIndices", "\\packageMaintainer", "\\packageTitle", + "\\pkg", "\\PR", "\\preformatted", + "\\renewcommand", + "\\S3method", "\\S4method", "\\samp", "\\special", + "\\testonly", + "\\url", + "\\var", "\\verb" +}; +// clang-format on + +static const std::string placeholder_id = "ROXYGEN-PLACEHOLDER"; + +// Double-escape a backslash into the output buffer, except \[ and \] +static void double_escape_char(std::string& out, const std::string& text, int& i) { + int n = text.length(); + if (i + 1 < n && (text[i + 1] == '[' || text[i + 1] == ']')) { + out += '\\'; + out += text[i + 1]; + i++; + } else { + out += '\\'; + out += '\\'; + } +} + +// Single-pass escape: scans text once, double-escaping normal text and +// replacing top-level fragile Rd tags with placeholders. +[[cpp11::register]] +cpp11::list escape_rd_for_md_c(std::string text) { + using namespace cpp11; + + int n = text.length(); + std::string output; + output.reserve(n + n / 4); + std::vector captures; + + // State for capturing inside a fragile tag + // - RD: normal Rd text inside a tag + // - RD_ESCAPE: just saw \ inside a tag (skip one char for brace counting) + // - RD_COMMENT: inside a % comment (braces not counted) + enum class Sub { RD, RD_ESCAPE, RD_COMMENT }; + bool capturing = false; + std::string capture_buf; + int braces = 0; + Sub sub = Sub::RD; + + int i = 0; + while (i < n) { + if (!capturing) { + // NORMAL mode: double-escape text, detect fragile tags + if (text[i] == '\\' && i + 1 < n && std::isalpha(text[i + 1])) { + // Read tag name + int tag_start = i; + int j = i + 1; + while (j < n && std::isalnum(text[j])) { + j++; + } + std::string tag_name = text.substr(tag_start, j - tag_start); + + if (fragile_tags.count(tag_name)) { + // Start capturing this fragile tag + capturing = true; + braces = 0; + sub = Sub::RD; + capture_buf = tag_name; + i = j; + + // Check if the tag has arguments (next char must be '{') + if (i >= n || text[i] != '{') { + // Tag is complete with no arguments + captures.push_back(capture_buf); + output += placeholder_id + "-" + std::to_string(captures.size()) + "-"; + capturing = false; + } + continue; + } else { + // Not fragile: double-escape the backslash, output tag name + double_escape_char(output, text, i); + i++; + // Output the rest of the tag name (alpha chars after the \) + while (i < n && std::isalnum(text[i])) { + output += text[i]; + i++; + } + continue; + } + } else if (text[i] == '\\') { + double_escape_char(output, text, i); + i++; + continue; + } else { + output += text[i]; + i++; + continue; + } + } else { + // CAPTURE mode: accumulate text, track Rd state machine + capture_buf += text[i]; + + switch (sub) { + case Sub::RD: + if (text[i] == '{') { + braces++; + } else if (text[i] == '}') { + braces--; + } else if (text[i] == '\\') { + sub = Sub::RD_ESCAPE; + } else if (text[i] == '%') { + sub = Sub::RD_COMMENT; + } + break; + case Sub::RD_ESCAPE: + sub = Sub::RD; + break; + case Sub::RD_COMMENT: + if (text[i] == '\n') { + sub = Sub::RD; + } + break; + } + + // Check if the tag is complete + bool complete = braces == 0 && (sub == Sub::RD || sub == Sub::RD_COMMENT); + if (complete && (i + 1 >= n || text[i + 1] != '{')) { + captures.push_back(capture_buf); + output += placeholder_id + "-" + std::to_string(captures.size()) + "-"; + capturing = false; + } + + i++; + } + } + + // If we were still capturing at end of string (incomplete tag), flush as-is + if (capturing) { + // Double-escape the capture buffer since it wasn't a complete tag + for (int k = 0; k < (int)capture_buf.length(); k++) { + if (capture_buf[k] == '\\') { + double_escape_char(output, capture_buf, k); + } else { + output += capture_buf[k]; + } + } + } + + writable::strings tag_texts(captures.size()); + for (size_t j = 0; j < captures.size(); j++) { + tag_texts[j] = captures[j]; + } + + writable::list result({ + "text"_nm = output, + "tags"_nm = tag_texts + }); + return result; +} + +[[cpp11::register]] +std::string unescape_rd_for_md_c(std::string rd_text, cpp11::strings tags) { + if (tags.size() == 0) { + return rd_text; + } + + int id_len = placeholder_id.length(); + int n = rd_text.length(); + std::string output; + output.reserve(n); + + int i = 0; + while (i < n) { + // Check if we're at the start of a placeholder + if (rd_text[i] == placeholder_id[0] && i + id_len < n && + rd_text.compare(i, id_len, placeholder_id) == 0 && + rd_text[i + id_len] == '-') { + // Parse the number between the two dashes: ROXYGEN-PLACEHOLDER-NUMBER- + int j = i + id_len + 1; + int num = 0; + while (j < n && rd_text[j] >= '0' && rd_text[j] <= '9') { + num = num * 10 + (rd_text[j] - '0'); + j++; + } + // Check for trailing dash and valid tag index + if (j < n && rd_text[j] == '-' && num >= 1 && num <= tags.size()) { + output += std::string(tags[num - 1]); + i = j + 1; + continue; + } + } + output += rd_text[i]; + i++; + } + + return output; +} diff --git a/tests/testthat/test-markdown-escaping.R b/tests/testthat/test-markdown-escaping.R new file mode 100644 index 000000000..8e845c7b6 --- /dev/null +++ b/tests/testthat/test-markdown-escaping.R @@ -0,0 +1,104 @@ +test_that("empty string passes through", { + result <- escape_rd_for_md_c("") + expect_equal(result$text, "") + expect_equal(as.character(result$tags), character()) +}) + +test_that("plain text is only double-escaped", { + result <- escape_rd_for_md_c("plain text") + expect_equal(result$text, "plain text") + expect_equal(as.character(result$tags), character()) +}) + +test_that("backslashes are doubled", { + result <- escape_rd_for_md_c(r"(a \ b)") + expect_equal(result$text, r"(a \\ b)") +}) + +test_that("\\[ and \\] are not doubled", { + result <- escape_rd_for_md_c(r"(\[ not a link \])") + expect_equal(result$text, r"(\[ not a link \])") +}) + +test_that("non-fragile tags are double-escaped, not captured", { + result <- escape_rd_for_md_c(r"(\emph{text})") + expect_equal(result$text, r"(\\emph{text})") + expect_equal(as.character(result$tags), character()) +}) + +test_that("fragile tag with one argument", { + result <- escape_rd_for_md_c(r"(\code{foo()})") + expect_match(result$text, "ROXYGEN-PLACEHOLDER-1-") + expect_equal(as.character(result$tags), r"(\code{foo()})") +}) + +test_that("fragile tag with no arguments", { + result <- escape_rd_for_md_c(r"(\url rest)") + expect_equal(as.character(result$tags), r"(\url)") + expect_match(result$text, "rest") +}) + +test_that("fragile tag with multiple argument groups", { + # \link{fun} captures the {fun} argument + result <- escape_rd_for_md_c(r"(\link{fun})") + expect_equal(as.character(result$tags), r"(\link{fun})") + + # \link[pkg]{fun} — the [ stops argument scanning, so only \link is captured + result2 <- escape_rd_for_md_c(r"(\link[pkg]{fun})") + expect_equal(as.character(result2$tags), r"(\link)") +}) + +test_that("multiple fragile tags", { + result <- escape_rd_for_md_c(r"(See \code{foo()} and \link{bar})") + expect_equal(length(result$tags), 2) + expect_equal(as.character(result$tags), c(r"(\code{foo()})", r"(\link{bar})")) +}) + +test_that("nested fragile tags captured as one", { + result <- escape_rd_for_md_c(r"(\code{a \link{b} c})") + expect_equal(length(result$tags), 1) + expect_equal(as.character(result$tags), r"(\code{a \link{b} c})") +}) + +test_that("adjacent fragile tags", { + result <- escape_rd_for_md_c(r"(\code{a}\link{b})") + expect_equal(length(result$tags), 2) + expect_equal(as.character(result$tags), c(r"(\code{a})", r"(\link{b})")) +}) + +test_that("Rd escapes inside tags", { + result <- escape_rd_for_md_c(r"(\code{a \{ b})") + expect_equal(as.character(result$tags), r"(\code{a \{ b})") +}) + +test_that("% comments inside tags", { + result <- escape_rd_for_md_c("\\code{a % comment\nb}") + expect_equal(as.character(result$tags), "\\code{a % comment\nb}") +}) + +test_that("text around fragile tags is double-escaped", { + result <- escape_rd_for_md_c(r"(a \ \code{x} \ b)") + expect_equal(as.character(result$tags), r"(\code{x})") + # The backslashes outside the tag should be doubled + expect_true(grepl("a \\\\ ", result$text, fixed = TRUE)) + expect_true(grepl(" \\\\ b", result$text, fixed = TRUE)) +}) + +test_that("round-trip escape/unescape preserves Rd tags", { + cases <- c( + r"(See \code{foo()} and \link{bar} for details.)", + r"(\code{a \link{b} c})", + "" + ) + for (text in cases) { + escaped <- escape_rd_for_md(text) + result <- unescape_rd_for_md(escaped, escaped) + expect_equal(result, text, info = text) + } +}) + +test_that("unescape_rd_for_md_c restores placeholders", { + rd_text <- "See ROXYGEN-PLACEHOLDER-1- for details" + result <- unescape_rd_for_md_c(rd_text, r"(\code{foo()})") + expect_equal(result, r"(See \code{foo()} for details)") +}) diff --git a/tests/testthat/test-rd-markdown-escaping.R b/tests/testthat/test-rd-markdown-escaping.R deleted file mode 100644 index 6f93e078a..000000000 --- a/tests/testthat/test-rd-markdown-escaping.R +++ /dev/null @@ -1,122 +0,0 @@ -tag_df <- function(tag, start, end, argend = NULL) { - df <- data.frame( - tag = tag, - start = start, - end = end - ) - if (!is.null(argend)) { - df$argend <- argend - } - df -} - -test_that("find_all_tag_names", { - text <- r"(blah blah \mytag blah blah)" - expect_equal( - find_all_tag_names(text), - tag_df(r"(\mytag)", 11, 16) - ) -}) - -test_that("find_all_rd_tags", { - cases <- list( - ## No tags - list("", character(), numeric(), numeric(), numeric()), - list("nothing to see here", character(), numeric(), numeric(), numeric()), - list("\nstill\nnothing\n", character(), numeric(), numeric(), numeric()), - - ## One tag - list(r"(blah blah \mytag blah blah)", r"(\mytag)", 11, 16, 16), - list(r"(blah blah \mytag{arg1} blah blah)", r"(\mytag)", 11, 16, 22), - list(r"(blah blah \mytag{arg1}{arg2} blah blah)", r"(\mytag)", 11, 16, 28), - list(r"(blah\mytag)", r"(\mytag)", 5, 10, 10), - list(r"(blah \mytag)", r"(\mytag)", 6, 11, 11), - list(r"(blah\mytag{arg})", r"(\mytag)", 5, 10, 15), - list(r"(\mytag hoohoo)", r"(\mytag)", 1, 6, 6), - list(r"(\mytag)", r"(\mytag)", 1, 6, 6), - list(r"(\mytag{arg})", r"(\mytag)", 1, 6, 11), - list("blah \\mytag\nblah blah", r"(\mytag)", 6, 11, 11), - - ## Multiple tags - list( - r"(blah \tag1 \tag2{arg} blah)", - c(r"(\tag1)", r"(\tag2)"), - c(6, 12), - c(10, 16), - c(10, 21) - ), - list( - r"(blah \tag1{ \tag2{arg} } blah)", - c(r"(\tag1)", r"(\tag2)"), - c(6, 13), - c(10, 17), - c(24, 22) - ), - list( - "blah \\tag1{\n\\tag2{arg}\n} blah", - c(r"(\tag1)", r"(\tag2)"), - c(6, 13), - c(10, 17), - c(24, 22) - ) - ) - - for (case in cases) { - expect_equal( - find_all_rd_tags(case[[1]]), - do.call(tag_df, case[-1]), - info = case[[1]] - ) - } -}) - -test_that("find_fragile_rd_tags", { - fragile <- c(r"(\frag)", r"(\frag1)", r"(\frag2)") - - cases <- list( - list(r"(This is \frag{here}, \this{arg} not)", r"(\frag)"), - list(r"(Embedded \frag{ into \frag1{arg} plus })", r"(\frag)"), - list( - r"(blah \cmd{ \frag{arg} \frag{arg} } \frag2 blah)", - c(r"(\frag)", r"(\frag)", r"(\frag2)") - ) - ) - - for (case in cases) { - expect_equal( - find_fragile_rd_tags(case[[1]], fragile)$tag, - case[[2]], - info = case[[1]] - ) - } -}) - - -test_that("str_sub_same", { - expect_equal( - str_sub_same( - "123456789ab", - data.frame(start = c(1, 6), end = c(2, 10), argend = c(2, 10)), - "xxx" - ), - "xxx-1-345xxx-2-b" - ) - - expect_equal( - str_sub_same( - "123456789ab", - data.frame(start = c(1, 8), end = c(7, 10), argend = c(7, 10)), - "xxx" - ), - "xxx-1-xxx-2-b" - ) - - expect_equal( - str_sub_same( - "123456789ab", - data.frame(start = numeric(), end = numeric(), argend = numeric()), - "xxx" - ), - "123456789ab" - ) -}) diff --git a/vignettes/rd-formatting.Rmd b/vignettes/rd-formatting.Rmd index 2179aa62a..56b112a92 100644 --- a/vignettes/rd-formatting.Rmd +++ b/vignettes/rd-formatting.Rmd @@ -305,7 +305,7 @@ If you want to avoid this restriction, set the `restrict_image_formats` roxygen2 ### Some Rd tags can't contain markdown -When mixing `Rd` and Markdown notation, most `Rd` tags may contain Markdown markup, the ones that can *not* are: `r paste0("\x60", roxygen2:::escaped_for_md, "\x60", collapse = ", ")`. +When mixing `Rd` and Markdown notation, most `Rd` tags may contain Markdown markup, the ones that can *not* are: `\acronym`, `\code`, `\command`, `\CRANpkg`, `\deqn`, `\doi`, `\dontrun`, `\dontshow`, `\donttest`, `\email`, `\env`, `\eqn`, `\figure`, `\file`, `\if`, `\ifelse`, `\kbd`, `\link`, `\linkS4class`, `\method`, `\mjeqn`, `\mjdeqn`, `\mjseqn`, `\mjsdeqn`, `\mjteqn`, `\mjtdeqn`, `\newcommand`, `\option`, `\out`, `\packageAuthor`, `\packageDescription`, `\packageDESCRIPTION`, `\packageIndices`, `\packageMaintainer`, `\packageTitle`, `\pkg`, `\PR`, `\preformatted`, `\renewcommand`, `\S3method`, `\S4method`, `\samp`, `\special`, `\testonly`, `\url`, `\var`, `\verb`. ### Mixing Markdown and `Rd` markup