@@ -58,16 +58,17 @@ class Crawler implements \Countable, \IteratorAggregate
5858 */
5959 private bool $ isHtml = true ;
6060
61- private HTML5 $ html5Parser ;
61+
62+ private ?HTML5 $ html5Parser = null ;
6263
6364 /**
6465 * @param \DOMNodeList|\DOMNode|\DOMNode[]|string|null $node A Node to use as the base for the crawling
6566 */
66- public function __construct (\DOMNodeList |\DOMNode |array |string $ node = null , string $ uri = null , string $ baseHref = null )
67+ public function __construct (\DOMNodeList |\DOMNode |array |string $ node = null , string $ uri = null , string $ baseHref = null , bool $ useHtml5Parser = true )
6768 {
6869 $ this ->uri = $ uri ;
6970 $ this ->baseHref = $ baseHref ?: $ uri ;
70- $ this ->html5Parser = new HTML5 (['disable_html_ns ' => true ]);
71+ $ this ->html5Parser = $ useHtml5Parser ? new HTML5 (['disable_html_ns ' => true ]) : null ;
7172 $ this ->cachedNamespaces = new \ArrayObject ();
7273
7374 $ this ->add ($ node );
@@ -621,7 +622,7 @@ public function html(string $default = null): string
621622 $ node = $ this ->getNode (0 );
622623 $ owner = $ node ->ownerDocument ;
623624
624- if ('<!DOCTYPE html> ' === $ owner ->saveXML ($ owner ->childNodes [0 ])) {
625+ if ($ this -> html5Parser && '<!DOCTYPE html> ' === $ owner ->saveXML ($ owner ->childNodes [0 ])) {
625626 $ owner = $ this ->html5Parser ;
626627 }
627628
@@ -642,7 +643,7 @@ public function outerHtml(): string
642643 $ node = $ this ->getNode (0 );
643644 $ owner = $ node ->ownerDocument ;
644645
645- if ('<!DOCTYPE html> ' === $ owner ->saveXML ($ owner ->childNodes [0 ])) {
646+ if ($ this -> html5Parser && '<!DOCTYPE html> ' === $ owner ->saveXML ($ owner ->childNodes [0 ])) {
646647 $ owner = $ this ->html5Parser ;
647648 }
648649
@@ -1215,6 +1216,10 @@ private function parseHtmlString(string $content, string $charset): \DOMDocument
12151216
12161217 private function canParseHtml5String (string $ content ): bool
12171218 {
1219+ if (!$ this ->html5Parser ) {
1220+ return false ;
1221+ }
1222+
12181223 if (false === ($ pos = stripos ($ content , '<!doctype html> ' ))) {
12191224 return false ;
12201225 }
0 commit comments