-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFeedsXPathParserHTML.inc
More file actions
43 lines (40 loc) · 1.2 KB
/
FeedsXPathParserHTML.inc
File metadata and controls
43 lines (40 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
<?php
/**
* @files
* Provides the FeedsXPathParserHTML class.
*/
class FeedsXPathParserHTML extends FeedsXPathParserBase {
/**
* Implements FeedsXPathParserBase::setup().
*/
protected function setup($source_config, FeedsFetcherResult $fetcher_result) {
if (!empty($source_config['exp']['tidy'])) {
$config = array(
'merge-divs' => FALSE,
'merge-spans' => FALSE,
'join-styles' => FALSE,
'drop-empty-paras' => FALSE,
'wrap' => 0,
'tidy-mark' => FALSE,
'escape-cdata' => TRUE,
'word-2000' => TRUE,
);
// Default tidy encoding is UTF8.
$encoding = $source_config['exp']['tidy_encoding'];
$raw = tidy_repair_string(trim($fetcher_result->getRaw()), $config, $encoding);
}
else {
$raw = $fetcher_result->getRaw();
}
$doc = new DOMDocument();
// Use our own error handling.
$use = $this->errorStart();
$success = $doc->loadHTML($raw);
unset($raw);
$this->errorStop($use, $source_config['exp']['errors']);
if (!$success) {
throw new Exception(t('There was an error parsing the HTML document.'));
}
return $doc;
}
}