From ecc76abc6cd936b017b93d3a42d3cf1ff4908b02 Mon Sep 17 00:00:00 2001 From: OriginalAuthority Date: Thu, 11 Sep 2025 19:44:48 +0100 Subject: [PATCH 1/5] Validate the size of input et al --- extension.json | 22 ++++ i18n/en.json | 8 +- includes/ProgressTableProcessor.php | 162 +++++++++++++++++++++++++++- 3 files changed, 189 insertions(+), 3 deletions(-) diff --git a/extension.json b/extension.json index e78ca32..a6f7967 100644 --- a/extension.json +++ b/extension.json @@ -85,5 +85,27 @@ "TrackingCategories": [ "tpt-tracking-category" ], + "config": { + "TableProgressTrackingMaxRows": { + "description": "Integer. Maximum amount of rows that will be parsed before returning an error.", + "value": "100" + }, + "TableProgressTrackingMaxColumns": { + "description": "Integer. Maximum amount of columns that will be parsed before returning an error.", + "value": "10" + }, + "TableProgressTrackingMaxHTMLSize": { + "description": "Integer. Parser will bail when the parsed HTML reaches this value. Set to 25% of $wgMaxArticleSize by default", + "value": null + }, + "TableProgressTrackingMaxProcessingTime": { + "description": "Integer. Maximum number of seconds before the parser abandons parsing", + "value": 5 + }, + "TableProgressTrackingMaxWikitextSize": { + "description": "Integer. Maximum bytes of wikitext allowable for parsing. Default to 50KB", + "value": 50000 + } + }, "manifest_version": 2 } \ No newline at end of file diff --git a/i18n/en.json b/i18n/en.json index 4c71eef..c367098 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -6,5 +6,11 @@ }, "tableprogresstracking-desc": "An extension to track progress against MediaWiki tables.", "tableprogresstracking-duplicate-tables": "Duplicate tables found with the same table-id. The table-id must be unique across all tables.", - "tpt-tracking-category": "Pages using progress tables" + "tpt-tracking-category": "Pages using progress tables", + "tableprogresstracking-error-wikitext-size": "Input wikitext ($1) is larger than allowed ($2)", + "tableprogresstracking-error-html-size": "The resultant HTML ($1) is larger than allowed ($2)", + "tableprogresstracking-error-parsing-wikitext": "Processing timeout exceeded during wikitext parsing", + "tableprogresstracking-error-parsing-html": "Processing timeout exceeded during html parsing", + "tableprogresstracking-error-max-columns": "Table has too many columns. Maximum of $1 allowed", + "tableprogresstracking-error-max-rows": "Table has too many rows. Maximum of $1 allowed" } diff --git a/includes/ProgressTableProcessor.php b/includes/ProgressTableProcessor.php index 6dd07fd..583406d 100644 --- a/includes/ProgressTableProcessor.php +++ b/includes/ProgressTableProcessor.php @@ -7,6 +7,8 @@ use DOMXPath; use Exception; use MediaWiki\Html\Html; +use MediaWiki\MainConfigNames; +use MediaWiki\MediaWikiServices; use MediaWiki\Parser\Parser; use MediaWiki\Parser\PPFrame; @@ -57,6 +59,45 @@ class ProgressTableProcessor { */ private ?string $errorMessage = null; + /** + * Maximum amount of rows that will be parsed by this class before we bail + * $wgTableProgressTrackingMaxRows + * @var int + */ + private int $maxRows = 0; + + /** + * Same as above; $wgTableProgressTrackingMaxColumns + * @var int + */ + private int $maxColumns = 0; + + /** + * Maximum size of generated HTML in bytes before we abandon parsing the table + * $wgTableProgressTrackingMaxHTMLSize + * @var int + */ + private int $maxHTMLSize = 0; + + /** + * Maximum time we will spend in seconds processing and parsing the table + * $wgTableProgressTrackingMaxProcessingTime + * @var int + */ + private int $maxProcessingTime = 0; + + /** + * Time we began processing this wikitext + * @var float + */ + private float $startTime = 0.0; + + /** + * Maximum wikitext size we will try to parse before bailing + * @var int + */ + private int $maxInputSize = 0; + /** * Constructor * @@ -68,6 +109,30 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr $this->parser = $parser; $this->frame = $frame; + $config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'TableProgressTracking' ); + + $maxArticleSize = $config->get( MainConfigNames::MaxArticleSize ); + + $this->maxColumns = $config->get( 'TableProgressTrackingMaxColumns' ); + $this->maxRows = $config->get( 'TableProgressTrackingMaxRows' ); + + // if this wasn't set, then allow us to take 25% of the max article size. + // in a default MediaWiki install, where $wgMaxArticleSize is unset, this will be 512KB + $this->maxHTMLSize = $config->get( 'TableProgressTrackingMaxHTMLSize' ) ?? ( 0.25 * $maxArticleSize ); + $this->maxProcessingTime = $config->get( 'TableProgressTrackingMaxProcessingTime' ); + + // maximum bytes of wikitext we will try and parse. We will allow parsing of either 50KB by default + // or whatever is configured through $wgTableProcessTrackingMaxInputSize. + // if the wikitext exceeds this, we bail + $this->maxInputSize = $config->get( 'TableProgressTrackingMaxInputSize' ); + + + $size = strlen( $wikitext ); + if ( $size > $this->maxInputSize ) { + $this->errorMessage = wfMessage( 'table-progress-tracking-max-size-limit', $size, number_format( $this->maxInputSize ) )->text(); + return; + } + // Only set the unique column index if it is provided in the arguments // if not, we validate later that each row passes its own data-row-id // note we must - 1 from the value the user passsed as an argument as @@ -82,8 +147,27 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr $this->errorMessage = 'The table-id argument is required.'; return; } + } - $this->loadAndValidateHtml(); + /** + * Start the timer to measure how long we have been processing this table + * @return void + */ + private function startProcessingTimer(): void { + $this->startTime = microtime(true); + } + + /** + * Check if we've exceeded our processing time limit if we have + * we will bail + * @return bool + */ + private function checkTimeout(): bool { + if ( $this->startTime == 0.0 ) { + // we haven't started yet + return false; + } + return ( microtime( true ) - $this->startTime ) > $this->maxProcessingTime; } /** @@ -92,6 +176,14 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr * @throws Exception */ private function loadAndValidateHtml(): void { + + $this->startProcessingTimer(); + + if ( $this->checkTimeout() ) { + // @TODO: wfMessage + $this->errorMessage = 'Processing timeout exceeded during initialisation.'; + return; + } // first parse our wikitext so we can get the HTML representation if it; // we use ->recursiveTagParseFully here as we need the final HTML version of the // table so that we can ensure if unique-column-index is used, and the content of the @@ -101,6 +193,18 @@ private function loadAndValidateHtml(): void { // parser that I can find. $tableHtml = $this->parser->recursiveTagParseFully( $this->wikitext, $this->frame ); + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-wikitext' )->text(); + return; + } + + $tableSize = strlen( $tableHtml ); + + if ( $tableSize > $this->maxHTMLSize ) { + $this->errorMessage = wfMessage( "tableprogresstracking-error-html-size", $tableSize, number_format( $this->maxHTMLSize ) ); + return; + } + if ( empty( trim( $tableHtml ) ) ) { $this->errorMessage = 'Parsing the wikitext resulted in empty HTML.'; return; @@ -115,6 +219,11 @@ private function loadAndValidateHtml(): void { LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD ); + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + return; + } + $tableNode = $this->dom->getElementsByTagName( 'table' )->item( 0 ); if ( !$tableNode ) { @@ -146,15 +255,30 @@ private function validateUniqueColumnIndex(): void { $xpath = new DOMXPath( $this->dom ); $allRows = $xpath->query( './/tr', $this->table ); $maxColumns = 0; + $processedRows = 0; foreach ( $allRows as $row ) { + if ( $processedRows >= $this->maxRows || $this->checkTimeout() ) { + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + return; + } + break; + } + $cellCount = $row->getElementsByTagName( 'td' )->length + $row->getElementsByTagName( 'th' )->length; + + if ( $cellCount > $this->maxColumns ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-max-columns', $this->maxColumns )->text(); + return; + } + $maxColumns = max( $maxColumns, $cellCount ); + $processedRows++; } if ( $this->uniqueColumnIndex >= $maxColumns ) { $this->errorMessage = "unique-column-index ({$this->uniqueColumnIndex}) is out of range. Table has {$maxColumns} columns (0-" . ( $maxColumns - 1 ) . ")."; - return; } } @@ -162,15 +286,30 @@ private function validateUniqueColumnIndex(): void { * Validates that all data rows have data-row-id attributes when unique-column-index is not provided */ private function validateDataRowIds(): bool { + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + return false; + } $xpath = new DOMXPath( $this->dom ); $dataRows = $xpath->query( './/tr[not(th)]', $this->table ); + $processedRows = 0; foreach ( $dataRows as $row ) { + if ( $processedRows >= $this->maxRows || $this->checkTimeout() ) { + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + } else { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-max-rows' )->text(); + } + return false; + } + $rowId = $this->extractDataRowId( $row ); if ( empty( $rowId ) ) { $this->errorMessage = 'When unique-column-index is not provided, all data rows must have a data-row-id attribute.'; return false; } + $processedRows++; } return true; @@ -208,8 +347,16 @@ private function extractDataRowId( DOMElement $row ): ?string { * (also modularrrr) * * @return string The final, processed HTML. + * @throws Exception */ public function process(): string { + // constructor may have returned an error already, so bail before we even start + if ( $this->hasError() ) { + return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); + } + + $this->loadAndValidateHtml(); + if ( $this->hasError() ) { return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); } @@ -229,6 +376,11 @@ public function process(): string { $this->processDataRows(); + // let's check the erorrs again incase $this->processDataRows exited unsuccessfully + if ( $this->hasError() ) { + return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); + } + // if we got this far, we can assume the table is valid and ready to be returned // lets add a tracking category also so we know which pages are using this extension $this->parser->addTrackingCategory( 'tpt-tracking-category' ); @@ -271,6 +423,12 @@ private function addProgressHeader(): void { * Iterates over all data rows (tr without th) and adds the checkbox cell to each. */ private function processDataRows(): void { + + if ( $this->checkTimeout() ) { + $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); + return; + } + $xpath = new DOMXPath( $this->dom ); // this is fucked, but this should be better than just trying to get the tr element with ->getElementByTagName('tr') as that will return all tr elements, including the header ones $dataRows = $xpath->query( './/tr[not(th)]', $this->table ); From aa727af2bf9e831f2df411324c93db42cb7417d9 Mon Sep 17 00:00:00 2001 From: OriginalAuthority Date: Thu, 11 Sep 2025 20:03:22 +0100 Subject: [PATCH 2/5] Fix conversion between bytes, kb, whatever it's wrong rn --- includes/ProgressTableProcessor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/includes/ProgressTableProcessor.php b/includes/ProgressTableProcessor.php index 583406d..6a6429a 100644 --- a/includes/ProgressTableProcessor.php +++ b/includes/ProgressTableProcessor.php @@ -117,8 +117,8 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr $this->maxRows = $config->get( 'TableProgressTrackingMaxRows' ); // if this wasn't set, then allow us to take 25% of the max article size. - // in a default MediaWiki install, where $wgMaxArticleSize is unset, this will be 512KB - $this->maxHTMLSize = $config->get( 'TableProgressTrackingMaxHTMLSize' ) ?? ( 0.25 * $maxArticleSize ); + // in a default MediaWiki install, where $wgMaxArticleSize is unset + $this->maxHTMLSize = $config->get( 'TableProgressTrackingMaxHTMLSize' ) ?? (int)( $maxArticleSize * 1024 * 0.25 ); $this->maxProcessingTime = $config->get( 'TableProgressTrackingMaxProcessingTime' ); // maximum bytes of wikitext we will try and parse. We will allow parsing of either 50KB by default From a6f8b4a61de1aa0a0c29feb4110bc4ffe516405b Mon Sep 17 00:00:00 2001 From: OriginalAuthority Date: Thu, 11 Sep 2025 20:32:32 +0100 Subject: [PATCH 3/5] Add builder for config --- extension.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extension.json b/extension.json index a6f7967..5e3965c 100644 --- a/extension.json +++ b/extension.json @@ -106,6 +106,9 @@ "description": "Integer. Maximum bytes of wikitext allowable for parsing. Default to 50KB", "value": 50000 } + }, + "ConfigRegistry": { + "TableProgressTracking": "MediaWiki\\Config\\GlobalVarConfig::newInstance" }, "manifest_version": 2 } \ No newline at end of file From 8cf0230313ef234b5d6d2017e04c8183d26117a1 Mon Sep 17 00:00:00 2001 From: OriginalAuthority Date: Thu, 11 Sep 2025 20:46:48 +0100 Subject: [PATCH 4/5] Fix logic here --- extension.json | 6 +++--- includes/ProgressTableProcessor.php | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/extension.json b/extension.json index 5e3965c..526ad43 100644 --- a/extension.json +++ b/extension.json @@ -88,11 +88,11 @@ "config": { "TableProgressTrackingMaxRows": { "description": "Integer. Maximum amount of rows that will be parsed before returning an error.", - "value": "100" + "value": 100 }, "TableProgressTrackingMaxColumns": { "description": "Integer. Maximum amount of columns that will be parsed before returning an error.", - "value": "10" + "value": 10 }, "TableProgressTrackingMaxHTMLSize": { "description": "Integer. Parser will bail when the parsed HTML reaches this value. Set to 25% of $wgMaxArticleSize by default", @@ -102,7 +102,7 @@ "description": "Integer. Maximum number of seconds before the parser abandons parsing", "value": 5 }, - "TableProgressTrackingMaxWikitextSize": { + "TableProgressTrackingMaxInputSize": { "description": "Integer. Maximum bytes of wikitext allowable for parsing. Default to 50KB", "value": 50000 } diff --git a/includes/ProgressTableProcessor.php b/includes/ProgressTableProcessor.php index 6a6429a..1a5031d 100644 --- a/includes/ProgressTableProcessor.php +++ b/includes/ProgressTableProcessor.php @@ -129,7 +129,7 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr $size = strlen( $wikitext ); if ( $size > $this->maxInputSize ) { - $this->errorMessage = wfMessage( 'table-progress-tracking-max-size-limit', $size, number_format( $this->maxInputSize ) )->text(); + $this->errorMessage = wfMessage( 'tableprogresstracking-error-wikitext-size', $size, number_format( $this->maxInputSize ) )->text(); return; } @@ -227,7 +227,6 @@ private function loadAndValidateHtml(): void { $tableNode = $this->dom->getElementsByTagName( 'table' )->item( 0 ); if ( !$tableNode ) { - $this->parser->getOutput()->updateCacheExpiry( 0 ); $this->errorMessage = 'No table was provided for progress tracking. Please include a table between the tags.'; return; } @@ -258,11 +257,11 @@ private function validateUniqueColumnIndex(): void { $processedRows = 0; foreach ( $allRows as $row ) { - if ( $processedRows >= $this->maxRows || $this->checkTimeout() ) { + if ( $processedRows > $this->maxRows || $this->checkTimeout() ) { if ( $this->checkTimeout() ) { $this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text(); - return; } + $this->errorMessage = wfMessage( 'tableprogresstracking-error-max-rows', $this->maxRows )->text(); break; } @@ -352,18 +351,18 @@ private function extractDataRowId( DOMElement $row ): ?string { public function process(): string { // constructor may have returned an error already, so bail before we even start if ( $this->hasError() ) { - return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); + return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) ); } $this->loadAndValidateHtml(); if ( $this->hasError() ) { - return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); + return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) ); } // If no unique-column-index is provided, validate that all rows have data-row-id if ( $this->uniqueColumnIndex === null && !$this->validateDataRowIds() ) { - return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); + return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) ); } $this->setTableAttributes(); @@ -378,7 +377,7 @@ public function process(): string { // let's check the erorrs again incase $this->processDataRows exited unsuccessfully if ( $this->hasError() ) { - return self::renderError( htmlspecialchars( $this->getErrorMessage() ) ); + return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) ); } // if we got this far, we can assume the table is valid and ready to be returned @@ -558,8 +557,9 @@ private function generateFinalHtml(): string { * @param string $message The error message to display. * @return string */ - private static function renderError( string $message ): string { + private function renderError( string $message ): string { $escapedMessage = htmlspecialchars( $message ); + $this->parser->getOutput()->updateCacheExpiry( 0 ); return Html::errorBox( $escapedMessage ); } From ac61c1b245945e9da804eab8640d658e307dcb64 Mon Sep 17 00:00:00 2001 From: OriginalAuthority Date: Thu, 11 Sep 2025 20:51:41 +0100 Subject: [PATCH 5/5] Bump version --- extension.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension.json b/extension.json index 526ad43..82f794e 100644 --- a/extension.json +++ b/extension.json @@ -7,7 +7,7 @@ "descriptionmsg": "tableprogresstracking-desc", "license-name": "Apache-2.0", "type": "parserhook", - "version": "1.1.1", + "version": "1.2.0", "requires": { "MediaWiki": ">= 1.43.0" },