diff --git a/extension.json b/extension.json index e78ca32..82f794e 100644 --- a/extension.json +++ b/extension.json @@ -7,7 +7,7 @@ "descriptionmsg": "tableprogresstracking-desc", "license-name": "Apache-2.0", "type": "parserhook", - "version": "1.1.1", + "version": "1.2.0", "requires": { "MediaWiki": ">= 1.43.0" }, @@ -85,5 +85,30 @@ "TrackingCategories": [ "tpt-tracking-category" ], + "config": { + "TableProgressTrackingMaxRows": { + "description": "Integer. Maximum amount of rows that will be parsed before returning an error.", + "value": 100 + }, + "TableProgressTrackingMaxColumns": { + "description": "Integer. Maximum amount of columns that will be parsed before returning an error.", + "value": 10 + }, + "TableProgressTrackingMaxHTMLSize": { + "description": "Integer. Parser will bail when the parsed HTML reaches this value. Set to 25% of $wgMaxArticleSize by default", + "value": null + }, + "TableProgressTrackingMaxProcessingTime": { + "description": "Integer. Maximum number of seconds before the parser abandons parsing", + "value": 5 + }, + "TableProgressTrackingMaxInputSize": { + "description": "Integer. Maximum bytes of wikitext allowable for parsing. Default to 50KB", + "value": 50000 + } + }, + "ConfigRegistry": { + "TableProgressTracking": "MediaWiki\\Config\\GlobalVarConfig::newInstance" + }, "manifest_version": 2 } \ No newline at end of file diff --git a/i18n/en.json b/i18n/en.json index 4c71eef..c367098 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -6,5 +6,11 @@ }, "tableprogresstracking-desc": "An extension to track progress against MediaWiki tables.", "tableprogresstracking-duplicate-tables": "Duplicate tables found with the same table-id. The table-id must be unique across all tables.", - "tpt-tracking-category": "Pages using progress tables" + "tpt-tracking-category": "Pages using progress tables", + "tableprogresstracking-error-wikitext-size": "Input wikitext ($1) is larger than allowed ($2)", + "tableprogresstracking-error-html-size": "The resultant HTML ($1) is larger than allowed ($2)", + "tableprogresstracking-error-parsing-wikitext": "Processing timeout exceeded during wikitext parsing", + "tableprogresstracking-error-parsing-html": "Processing timeout exceeded during html parsing", + "tableprogresstracking-error-max-columns": "Table has too many columns. Maximum of $1 allowed", + "tableprogresstracking-error-max-rows": "Table has too many rows. Maximum of $1 allowed" } diff --git a/includes/ProgressTableProcessor.php b/includes/ProgressTableProcessor.php index 6dd07fd..1a5031d 100644 --- a/includes/ProgressTableProcessor.php +++ b/includes/ProgressTableProcessor.php @@ -7,6 +7,8 @@ use DOMXPath; use Exception; use MediaWiki\Html\Html; +use MediaWiki\MainConfigNames; +use MediaWiki\MediaWikiServices; use MediaWiki\Parser\Parser; use MediaWiki\Parser\PPFrame; @@ -57,6 +59,45 @@ class ProgressTableProcessor { */ private ?string $errorMessage = null; + /** + * Maximum amount of rows that will be parsed by this class before we bail + * $wgTableProgressTrackingMaxRows + * @var int + */ + private int $maxRows = 0; + + /** + * Same as above; $wgTableProgressTrackingMaxColumns + * @var int + */ + private int $maxColumns = 0; + + /** + * Maximum size of generated HTML in bytes before we abandon parsing the table + * $wgTableProgressTrackingMaxHTMLSize + * @var int + */ + private int $maxHTMLSize = 0; + + /** + * Maximum time we will spend in seconds processing and parsing the table + * $wgTableProgressTrackingMaxProcessingTime + * @var int + */ + private int $maxProcessingTime = 0; + + /** + * Time we began processing this wikitext + * @var float + */ + private float $startTime = 0.0; + + /** + * Maximum wikitext size we will try to parse before bailing + * @var int + */ + private int $maxInputSize = 0; + /** * Constructor * @@ -68,6 +109,30 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr $this->parser = $parser; $this->frame = $frame; + $config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'TableProgressTracking' ); + + $maxArticleSize = $config->get( MainConfigNames::MaxArticleSize ); + + $this->maxColumns = $config->get( 'TableProgressTrackingMaxColumns' ); + $this->maxRows = $config->get( 'TableProgressTrackingMaxRows' ); + + // if this wasn't set, then allow us to take 25% of the max article size. + // in a default MediaWiki install, where $wgMaxArticleSize is unset + $this->maxHTMLSize = $config->get( 'TableProgressTrackingMaxHTMLSize' ) ?? (int)( $maxArticleSize * 1024 * 0.25 ); + $this->maxProcessingTime = $config->get( 'TableProgressTrackingMaxProcessingTime' ); + + // maximum bytes of wikitext we will try and parse. We will allow parsing of either 50KB by default + // or whatever is configured through $wgTableProcessTrackingMaxInputSize. + // if the wikitext exceeds this, we bail + $this->maxInputSize = $config->get( 'TableProgressTrackingMaxInputSize' ); + + + $size = strlen( $wikitext ); + if ( $size > $this->maxInputSize ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-wikitext-size', $size, number_format( $this->maxInputSize ) )->text(); + return; + } + // Only set the unique column index if it is provided in the arguments // if not, we validate later that each row passes its own data-row-id // note we must - 1 from the value the user passsed as an argument as @@ -82,8 +147,27 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr $this->errorMessage = 'The table-id argument is required.'; return; } + } - $this->loadAndValidateHtml(); + /** + * Start the timer to measure how long we have been processing this table + * @return void + */ + private function startProcessingTimer(): void { + $this->startTime = microtime(true); + } + + /** + * Check if we've exceeded our processing time limit if we have + * we will bail + * @return bool + */ + private function checkTimeout(): bool { + if ( $this->startTime == 0.0 ) { + // we haven't started yet + return false; + } + return ( microtime( true ) - $this->startTime ) > $this->maxProcessingTime; } /** @@ -92,6 +176,14 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr * @throws Exception */ private function loadAndValidateHtml(): void { + + $this->startProcessingTimer(); + + if ( $this->checkTimeout() ) { + // @TODO: wfMessage + $this->errorMessage = 'Processing timeout exceeded during initialisation.'; + return; + } // first parse our wikitext so we can get the HTML representation if it; // we use ->recursiveTagParseFully here as we need the final HTML version of the // table so that we can ensure if unique-column-index is used, and the content of the @@ -101,6 +193,18 @@ private function loadAndValidateHtml(): void { // parser that I can find. $tableHtml = $this->parser->recursiveTagParseFully( $this->wikitext, $this->frame ); + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-wikitext' )->text(); + return; + } + + $tableSize = strlen( $tableHtml ); + + if ( $tableSize > $this->maxHTMLSize ) { + $this->errorMessage = wfMessage( "tableprogresstracking-error-html-size", $tableSize, number_format( $this->maxHTMLSize ) ); + return; + } + if ( empty( trim( $tableHtml ) ) ) { $this->errorMessage = 'Parsing the wikitext resulted in empty HTML.'; return; @@ -115,10 +219,14 @@ private function loadAndValidateHtml(): void { LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD ); + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + return; + } + $tableNode = $this->dom->getElementsByTagName( 'table' )->item( 0 ); if ( !$tableNode ) { - $this->parser->getOutput()->updateCacheExpiry( 0 ); $this->errorMessage = 'No table was provided for progress tracking. Please include a table between the tags.'; return; } @@ -146,15 +254,30 @@ private function validateUniqueColumnIndex(): void { $xpath = new DOMXPath( $this->dom ); $allRows = $xpath->query( './/tr', $this->table ); $maxColumns = 0; + $processedRows = 0; foreach ( $allRows as $row ) { + if ( $processedRows > $this->maxRows || $this->checkTimeout() ) { + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + } + $this->errorMessage = wfMessage( 'tableprogresstracking-error-max-rows', $this->maxRows )->text(); + break; + } + $cellCount = $row->getElementsByTagName( 'td' )->length + $row->getElementsByTagName( 'th' )->length; + + if ( $cellCount > $this->maxColumns ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-max-columns', $this->maxColumns )->text(); + return; + } + $maxColumns = max( $maxColumns, $cellCount ); + $processedRows++; } if ( $this->uniqueColumnIndex >= $maxColumns ) { $this->errorMessage = "unique-column-index ({$this->uniqueColumnIndex}) is out of range. Table has {$maxColumns} columns (0-" . ( $maxColumns - 1 ) . ")."; - return; } } @@ -162,15 +285,30 @@ private function validateUniqueColumnIndex(): void { * Validates that all data rows have data-row-id attributes when unique-column-index is not provided */ private function validateDataRowIds(): bool { + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + return false; + } $xpath = new DOMXPath( $this->dom ); $dataRows = $xpath->query( './/tr[not(th)]', $this->table ); + $processedRows = 0; foreach ( $dataRows as $row ) { + if ( $processedRows >= $this->maxRows || $this->checkTimeout() ) { + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + } else { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-max-rows' )->text(); + } + return false; + } + $rowId = $this->extractDataRowId( $row ); if ( empty( $rowId ) ) { $this->errorMessage = 'When unique-column-index is not provided, all data rows must have a data-row-id attribute.'; return false; } + $processedRows++; } return true; @@ -208,15 +346,23 @@ private function extractDataRowId( DOMElement $row ): ?string { * (also modularrrr) * * @return string The final, processed HTML. + * @throws Exception */ public function process(): string { + // constructor may have returned an error already, so bail before we even start if ( $this->hasError() ) { - return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); + return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) ); + } + + $this->loadAndValidateHtml(); + + if ( $this->hasError() ) { + return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) ); } // If no unique-column-index is provided, validate that all rows have data-row-id if ( $this->uniqueColumnIndex === null && !$this->validateDataRowIds() ) { - return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); + return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) ); } $this->setTableAttributes(); @@ -229,6 +375,11 @@ public function process(): string { $this->processDataRows(); + // let's check the erorrs again incase $this->processDataRows exited unsuccessfully + if ( $this->hasError() ) { + return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) ); + } + // if we got this far, we can assume the table is valid and ready to be returned // lets add a tracking category also so we know which pages are using this extension $this->parser->addTrackingCategory( 'tpt-tracking-category' ); @@ -271,6 +422,12 @@ private function addProgressHeader(): void { * Iterates over all data rows (tr without th) and adds the checkbox cell to each. */ private function processDataRows(): void { + + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + return; + } + $xpath = new DOMXPath( $this->dom ); // this is fucked, but this should be better than just trying to get the tr element with ->getElementByTagName('tr') as that will return all tr elements, including the header ones $dataRows = $xpath->query( './/tr[not(th)]', $this->table ); @@ -400,8 +557,9 @@ private function generateFinalHtml(): string { * @param string $message The error message to display. * @return string */ - private static function renderError( string $message ): string { + private function renderError( string $message ): string { $escapedMessage = htmlspecialchars( $message ); + $this->parser->getOutput()->updateCacheExpiry( 0 ); return Html::errorBox( $escapedMessage ); }