Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions src/elements/drawer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ use nom::{
IResult,
};

use crate::parse::combinators::{blank_lines_count, eol, lines_till};
use crate::{
parse::combinators::{blank_lines_count, eol, lines_till},
parsers::lines_until_headline_at_level_le,
};

/// Drawer Element
#[derive(Debug, Default, Clone)]
Expand Down Expand Up @@ -59,7 +62,18 @@ pub fn parse_drawer_without_blank(input: &str) -> IResult<&str, (Drawer, &str),
tag(":"),
)(input)?;
let (input, _) = eol(input)?;
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?;

// Restrict the search for the end of the drawer to the current headline.
let (_input_after_headline, (input_until_headline, _level)) =
lines_until_headline_at_level_le(input, std::usize::MAX)?;

// tail is the remaining not used for the drawer out of
// input_until_headline.
let (tail, contents) =
lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input_until_headline)?;

// Skip over the amount used by the drawer.
let input = &input[input_until_headline.len() - tail.len()..];

Ok((
input,
Expand Down Expand Up @@ -118,4 +132,30 @@ fn parse() {

// https://github.com/PoiScript/orgize/issues/9
assert!(parse_drawer(":SPAGHETTI:\n").is_err());

// https://github.com/PoiScript/orgize/issues/24
// A drawer may not contain a headline.
assert!(parse_drawer(
r#":MYDRAWER:
* Node
:END:"#
)
.is_err(),);

// A drawer may not contain another drawer. An attempt to do so will result
// in the drawer ending at the first end line.
assert_eq!(
parse_drawer(":OUTER:\nOuter Text\n:INNER:\nInner Text\n:END:\n:END:"),
Ok((
":END:",
(
Drawer {
name: "OUTER".into(),
pre_blank: 0,
post_blank: 0
},
"Outer Text\n:INNER:\nInner Text\n"
)
))
);
}
2 changes: 1 addition & 1 deletion src/elements/title.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,5 +453,5 @@ fn parse_properties_drawer_() {
.into_iter()
.collect::<HashMap<_, _>>()
))
)
);
}
155 changes: 140 additions & 15 deletions src/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ use std::marker::PhantomData;
use indextree::{Arena, NodeId};
use jetscii::{bytes, BytesConst};
use memchr::{memchr, memchr_iter};
use nom::bytes::complete::take_while1;
use nom::{
bytes::complete::is_a,
character::complete::one_of,
combinator::{map, verify},
IResult,
};

use crate::config::ParseConfig;
use crate::elements::{
Expand Down Expand Up @@ -635,23 +640,143 @@ pub fn blank_lines_count(input: &str) -> (&str, usize) {
crate::parse::combinators::blank_lines_count(input).unwrap_or((input, 0))
}

pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
let (input_, level) = parse_headline_level(input)?;
let (input_, content) = lines_while(move |line| {
parse_headline_level(line)
.map(|(_, l)| l > level)
.unwrap_or(true)
})(input_)
.unwrap_or((input_, ""));
Some((input_, (&input[0..level + content.len()], level)))
// Matches a headline of level <= max_level. This will always be exactly one
// line, including the terminal \n if one is present. Unlike org-mode (but like
// org-element), we accept '\n' and EOF to terminate the stars. Returns the
// number of stars. Must only be called at the start of a line.
pub(crate) fn parse_headline_level_le(input: &str, max_level: usize) -> IResult<&str, usize, ()> {
let (input, level) = verify(
map(is_a("*"), |s: &str| s.chars().count()),
|level: &usize| *level <= max_level,
)(input)?;
if !input.is_empty() {
one_of("\n ")(input)?;
let (input, _) = line_length(input)?;
Ok((input, level))
} else {
Ok((input, level))
}
}

pub fn parse_headline_level(input: &str) -> Option<(&str, usize)> {
let (input, stars) = take_while1::<_, _, ()>(|c: char| c == '*')(input).ok()?;
// Recognizes until end-of-line or end-of-input and returns the length of the
// line, including the terminal \n (or \r\n) if present.
fn line_length(input: &str) -> IResult<&str, usize, ()> {
match memchr(b'\n', input.as_bytes()) {
Some(index) => Ok((&input[index + 1..], index + 1)),
None => Ok(("", input.len())),
}
}

// Returns all text until a headline with level <= max_level is found. Must
// start at the start of the line. Can return nothing if immediately at a
// headline.
//
// This is a separate function from lines_while/lines_until because we need to
// treat EOF differently from EOL when the file ends with \r.
pub fn lines_until_headline_at_level_le(
input: &str,
max_level: usize,
) -> IResult<&str, (&str, usize), ()> {
// Collect lines until EOF or a headline.
let mut last = 0;
for i in memchr_iter(b'\n', input.as_bytes()) {
// Check the first byte after the newline to skip parsing unnecessarily.
if input.as_bytes()[last] == b'*'
&& parse_headline_level_le(&input[last..], max_level).is_ok()
{
break;
}

if input.starts_with(' ') || input.starts_with('\n') || input.is_empty() {
Some((input, stars.len()))
last = i + 1;
}

if last < input.len() && parse_headline_level_le(&input[last..], max_level).is_err() {
Ok(("", (input, max_level)))
} else {
None
Ok((&input[last..], (&input[..last], max_level)))
}
}

pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
// Consume the headline.
let (text, level) = parse_headline_level_le(input, std::usize::MAX).ok()?;
let (text, _content) = lines_until_headline_at_level_le(text, level).ok()?;
let split = input.len() - text.len();
Some((&input[split..], (&input[..split], level)))
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_parse_headline() {
assert_eq!(parse_headline(""), None);
assert_eq!(parse_headline("\n"), None);
assert_eq!(parse_headline("Hello"), None);
assert_eq!(parse_headline("Hello\n"), None);
assert_eq!(parse_headline("Hello\r"), None);
assert_eq!(parse_headline("Hello\n\r"), None);
assert_eq!(parse_headline("Hello\r\n"), None);
assert_eq!(parse_headline("Hello\n*"), None);
assert_eq!(parse_headline("Hello\n\n*"), None);
assert_eq!(parse_headline("Hello\r\n*"), None);
assert_eq!(parse_headline("Hello\n\r\n*"), None);
assert_eq!(parse_headline("Hello\r\n\n*"), None);
assert_eq!(parse_headline("*"), Some(("", ("*", 1))));
assert_eq!(parse_headline("*\n"), Some(("", ("*\n", 1))));
assert_eq!(parse_headline("*\n\r"), Some(("", ("*\n\r", 1))));
assert_eq!(parse_headline("* "), Some(("", ("* ", 1))));
assert_eq!(parse_headline("* \r"), Some(("", ("* \r", 1))));
assert_eq!(parse_headline("*\t"), None);
assert_eq!(parse_headline("*\t\n"), None);
assert_eq!(parse_headline("*\r\n"), None);
assert_eq!(parse_headline("* \n"), Some(("", ("* \n", 1))));
assert_eq!(parse_headline("* \n\r*"), Some(("", ("* \n\r*", 1))));
assert_eq!(parse_headline("* \n\r**"), Some(("", ("* \n\r**", 1))));
assert_eq!(parse_headline("*\n*"), Some(("*", ("*\n", 1))));
assert_eq!(parse_headline("*\n\n*"), Some(("*", ("*\n\n", 1))));
assert_eq!(parse_headline("*\r"), None);
assert_eq!(parse_headline("* *"), Some(("", ("* *", 1))));
assert_eq!(parse_headline("***\r** Hello\n"), None);
assert_eq!(
parse_headline("*** ** Hello\n"),
Some(("", ("*** ** Hello\n", 3)))
);
assert_eq!(parse_headline("* Hello"), Some(("", ("* Hello", 1))));
assert_eq!(
parse_headline("*** Hi\nWorld"),
Some(("", ("*** Hi\nWorld", 3)))
);

assert_eq!(
parse_headline("* Hello\nText\n** Test\n ** More text\n* World\n"),
Some(("* World\n", ("* Hello\nText\n** Test\n ** More text\n", 1)))
);

// We can parse a headline that contains the *\r\n. It is treated as
// text in the section.
assert_eq!(
parse_headline("* \n*\r\n* \n"),
Some(("* \n", ("* \n*\r\n", 1)))
);

// We can't parse a headline starting at *\r\n, thus ensuring that each
// line either is or is not a headline.
assert_eq!(parse_headline("*\r\n* \n"), None);

assert_eq!(parse_headline("* \n"), Some(("", ("* \n", 1))));

assert_eq!(
parse_headline("* \n**\r\n* \n"),
Some(("* \n", ("* \n**\r\n", 1)))
);

assert_eq!(parse_headline("* a\n*"), Some(("*", ("* a\n", 1))));
assert_eq!(parse_headline("* a\r\n*"), Some(("*", ("* a\r\n", 1))));
assert_eq!(parse_headline("* a\r\n* b"), Some(("* b", ("* a\r\n", 1))));
assert_eq!(parse_headline("* a\n* "), Some(("* ", ("* a\n", 1))));
assert_eq!(parse_headline("* a\n* \n"), Some(("* \n", ("* a\n", 1))));
assert_eq!(parse_headline("* a\n* \n"), Some(("* \n", ("* a\n", 1))));
}
}
18 changes: 18 additions & 0 deletions tests/issue_24.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
use orgize::Org;

#[test]
fn headline_in_drawer() {
// https://github.com/PoiScript/orgize/issues/24
// A drawer may not contain a headline.
const STARS: &str = "****";
for h1 in 1..STARS.len() {
for h2 in 1..STARS.len() {
let org = crate::Org::parse_string(format!(
"{} Hello\n:PROPERTIES:\n{} World\n:END:",
&STARS[..h1],
&STARS[..h2]
));
assert_eq!(org.headlines().count(), 2);
}
}
}