Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 6 additions & 15 deletions src/Html/HtmlOperations.fs
Original file line number Diff line number Diff line change
Expand Up @@ -234,32 +234,23 @@ module HtmlNode =
let classesToLookFor = cssClass.Split [|' '|]
classesToLookFor |> Array.forall (fun cssClass -> presentClasses |> Array.exists ((=) cssClass))

let private innerTextExcluding' recurse exclusions n =
let exclusions = "style" :: "script" :: exclusions
let isAriaHidden (n:HtmlNode) =
match tryGetAttribute "aria-hidden" n with
| Some a ->
match bool.TryParse(a.Value()) with
| true, v -> v
| false, _ -> false
| None -> false
let rec innerText' inRoot n =
let exclusions = if inRoot then ["style"; "script"] else exclusions
let private innerTextExcluding' recurse exclusions n =
let rec innerText' n =
match n with
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions && not (isAriaHidden n) ->
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions ->
seq { for e in content do
match e with
| HtmlText(text) -> yield text
| HtmlComment(_) -> yield ""
| elem ->
| elem ->
if recurse then
yield innerText' false elem
yield innerText' elem
else
yield "" }
|> String.Concat
| HtmlText(text) -> text
| _ -> ""
innerText' true n
innerText' n

let innerTextExcluding exclusions n =
innerTextExcluding' true exclusions n
Expand Down
34 changes: 32 additions & 2 deletions src/Html/HtmlRuntime.fs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,36 @@ module HtmlRuntime =
i <- i + 1
i

let private innerTextExcluding' recurse exclusions n =
let exclusions = "style" :: "script" :: exclusions
let isAriaHidden (n:HtmlNode) =
match n.TryGetAttribute "aria-hidden" with
| Some a ->
match bool.TryParse(a.Value()) with
| true, v -> v
| false, _ -> false
| None -> false
let rec innerText' inRoot n =
let exclusions = if inRoot then ["style"; "script"] else exclusions
match n with
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions && not (isAriaHidden n) ->
seq { for e in content do
match e with
| HtmlText(text) -> yield text
| HtmlComment(_) -> yield ""
| elem ->
if recurse then
yield innerText' false elem
else
yield "" }
|> String.Concat
| HtmlText(text) -> text
| _ -> ""
innerText' true n

let private innerTextExcluding exclusions n =
innerTextExcluding' true exclusions n

let private parseTable inferenceParameters includeLayoutTables makeUnique index (table:HtmlNode, parents:HtmlNode list) =
let rowSpan cell =
max 1 (defaultArg (TextConversions.AsInteger CultureInfo.InvariantCulture cell?rowspan) 0)
Expand Down Expand Up @@ -183,7 +213,7 @@ module HtmlRuntime =
for colindex, cell in cells.[rowindex] do
let data =
let getContents contents =
contents |> List.map (HtmlNode.innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"]) |> String.Concat |> normalizeWs
contents |> List.map (innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"]) |> String.Concat |> normalizeWs
match cell with
| HtmlElement("td", _, contents) -> Cell (false, getContents contents)
| HtmlElement("th", _, contents) -> Cell (true, getContents contents)
Expand Down Expand Up @@ -244,7 +274,7 @@ module HtmlRuntime =

let rows =
list.Descendants("li", true)
|> Seq.map (HtmlNode.innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"] >> normalizeWs)
|> Seq.map (innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"] >> normalizeWs)
|> Seq.toArray

if rows.Length <= 1 then None else
Expand Down
10 changes: 0 additions & 10 deletions tests/FSharp.Data.Tests/HtmlOperations.fs
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,3 @@ let ``Can get direct inner text``() =
let ``Inner text on a comment should be String.Empty``() =
let comment = HtmlNode.NewComment "Hello World"
HtmlNode.innerText comment |> should equal String.Empty

[<Test>]
let ``Inner text on a style should be String.Empty``() =
let comment = HtmlNode.NewElement("style", [HtmlNode.NewText "Hello World"])
HtmlNode.innerText comment |> should equal String.Empty

[<Test>]
let ``Inner text on a script should be String.Empty``() =
let comment = HtmlNode.NewElement("script", [HtmlNode.NewText "Hello World"])
HtmlNode.innerText comment |> should equal String.Empty