Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ fn node_text(handle: &Handle) -> String {
out.trim().to_string()
}

fn is_ignored_tag(tag: &str) -> bool {
tag.eq_ignore_ascii_case("script")
|| tag.eq_ignore_ascii_case("style")
|| tag.eq_ignore_ascii_case("noscript")
|| tag.eq_ignore_ascii_case("template")
|| tag.eq_ignore_ascii_case("head")
}

/// Recursively appends text nodes from `handle` to `out`, tracking whether the
/// previous output was whitespace.
fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) {
Expand All @@ -47,13 +55,7 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) {
}
}
NodeData::Element { name, .. } => {
let tag = name.local.as_ref();
if tag.eq_ignore_ascii_case("script")
|| tag.eq_ignore_ascii_case("style")
|| tag.eq_ignore_ascii_case("noscript")
|| tag.eq_ignore_ascii_case("template")
|| tag.eq_ignore_ascii_case("head")
{
if is_ignored_tag(name.local.as_ref()) {
return;
}
for child in handle.children.borrow().iter() {
Expand Down Expand Up @@ -101,13 +103,16 @@ fn collect_rows(handle: &Handle, rows: &mut Vec<Handle>) {
}
}

fn is_bold_tag(tag: &str) -> bool {
tag.eq_ignore_ascii_case("strong") || tag.eq_ignore_ascii_case("b")
}

/// Returns `true` if `handle` contains a `<b>` or `<strong>` descendant.
fn contains_strong(handle: &Handle) -> bool {
if let NodeData::Element { name, .. } = &handle.data {
let tag = name.local.as_ref();
if tag.eq_ignore_ascii_case("strong") || tag.eq_ignore_ascii_case("b") {
return true;
}
if let NodeData::Element { name, .. } = &handle.data
&& is_bold_tag(name.local.as_ref())
{
return true;
}
let children = handle.children.borrow();
children.iter().any(contains_strong)
Expand Down
7 changes: 4 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ use regex::Regex;
/// let cells = split_cells(line);
/// assert_eq!(cells, vec!["cell1", "cell2", "cell3"]);
/// ```
fn next_is_pipe(chars: &mut std::iter::Peekable<std::str::Chars<'_>>) -> bool {
chars.peek() == Some(&'|')
}
#[must_use]
pub fn split_cells(line: &str) -> Vec<String> {
let mut s = line.trim();
Expand All @@ -46,9 +49,7 @@ pub fn split_cells(line: &str) -> Vec<String> {
let mut chars = s.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\\' {
if let Some(&next) = chars.peek()
&& next == '|'
{
if next_is_pipe(&mut chars) {
// `\|` escapes the pipe so it becomes part of the cell
chars.next();
current.push('|');
Expand Down
6 changes: 5 additions & 1 deletion src/reflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ pub(crate) fn detect_separator(
let mut sep_row_idx: Option<usize> = None;

let sep_invalid = invalid_separator(sep_cells.as_ref(), max_cols);
if sep_invalid && second_row_is_separator(rows) {
if should_use_second_row_as_separator(sep_invalid, rows) {
sep_cells = Some(rows[1].clone());
sep_row_idx = Some(1);
}
Expand All @@ -129,6 +129,10 @@ fn invalid_separator(sep_cells: Option<&Vec<String>>, max_cols: usize) -> bool {
}
}

fn should_use_second_row_as_separator(sep_invalid: bool, rows: &[Vec<String>]) -> bool {
sep_invalid && second_row_is_separator(rows)
}

fn second_row_is_separator(rows: &[Vec<String>]) -> bool {
rows.len() > 1 && rows[1].iter().all(|c| crate::SEP_RE.is_match(c))
}