Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion extension.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"descriptionmsg": "tableprogresstracking-desc",
"license-name": "Apache-2.0",
"type": "parserhook",
"version": "1.1.1",
"version": "1.2.0",
"requires": {
"MediaWiki": ">= 1.43.0"
},
Expand Down Expand Up @@ -85,5 +85,30 @@
"TrackingCategories": [
"tpt-tracking-category"
],
"config": {
"TableProgressTrackingMaxRows": {
"description": "Integer. Maximum amount of rows that will be parsed before returning an error.",
"value": 100
},
"TableProgressTrackingMaxColumns": {
"description": "Integer. Maximum amount of columns that will be parsed before returning an error.",
"value": 10
},
"TableProgressTrackingMaxHTMLSize": {
"description": "Integer. Parser will bail when the parsed HTML reaches this value. Set to 25% of $wgMaxArticleSize by default",
"value": null
},
"TableProgressTrackingMaxProcessingTime": {
"description": "Integer. Maximum number of seconds before the parser abandons parsing",
"value": 5
},
"TableProgressTrackingMaxInputSize": {
"description": "Integer. Maximum bytes of wikitext allowable for parsing. Default to 50KB",
"value": 50000
}
},
"ConfigRegistry": {
"TableProgressTracking": "MediaWiki\\Config\\GlobalVarConfig::newInstance"
},
"manifest_version": 2
}
8 changes: 7 additions & 1 deletion i18n/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,11 @@
},
"tableprogresstracking-desc": "An extension to track progress against MediaWiki tables.",
"tableprogresstracking-duplicate-tables": "Duplicate tables found with the same table-id. The table-id must be unique across all tables.",
"tpt-tracking-category": "Pages using progress tables"
"tpt-tracking-category": "Pages using progress tables",
"tableprogresstracking-error-wikitext-size": "Input wikitext ($1) is larger than allowed ($2)",
"tableprogresstracking-error-html-size": "The resultant HTML ($1) is larger than allowed ($2)",
"tableprogresstracking-error-parsing-wikitext": "Processing timeout exceeded during wikitext parsing",
"tableprogresstracking-error-parsing-html": "Processing timeout exceeded during html parsing",
"tableprogresstracking-error-max-columns": "Table has too many columns. Maximum of $1 allowed",
"tableprogresstracking-error-max-rows": "Table has too many rows. Maximum of $1 allowed"
}
170 changes: 164 additions & 6 deletions includes/ProgressTableProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
use DOMXPath;
use Exception;
use MediaWiki\Html\Html;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Parser\Parser;
use MediaWiki\Parser\PPFrame;

Expand Down Expand Up @@ -57,6 +59,45 @@ class ProgressTableProcessor {
*/
private ?string $errorMessage = null;

/**
* Maximum amount of rows that will be parsed by this class before we bail
* $wgTableProgressTrackingMaxRows
* @var int
*/
private int $maxRows = 0;

/**
* Same as above; $wgTableProgressTrackingMaxColumns
* @var int
*/
private int $maxColumns = 0;

/**
* Maximum size of generated HTML in bytes before we abandon parsing the table
* $wgTableProgressTrackingMaxHTMLSize
* @var int
*/
private int $maxHTMLSize = 0;

/**
* Maximum time we will spend in seconds processing and parsing the table
* $wgTableProgressTrackingMaxProcessingTime
* @var int
*/
private int $maxProcessingTime = 0;

/**
* Time we began processing this wikitext
* @var float
*/
private float $startTime = 0.0;

/**
* Maximum wikitext size we will try to parse before bailing
* @var int
*/
private int $maxInputSize = 0;

/**
* Constructor
*
Expand All @@ -68,6 +109,30 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr
$this->parser = $parser;
$this->frame = $frame;

$config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'TableProgressTracking' );

$maxArticleSize = $config->get( MainConfigNames::MaxArticleSize );

$this->maxColumns = $config->get( 'TableProgressTrackingMaxColumns' );
$this->maxRows = $config->get( 'TableProgressTrackingMaxRows' );

// if this wasn't set, then allow us to take 25% of the max article size.
// in a default MediaWiki install, where $wgMaxArticleSize is unset
$this->maxHTMLSize = $config->get( 'TableProgressTrackingMaxHTMLSize' ) ?? (int)( $maxArticleSize * 1024 * 0.25 );
$this->maxProcessingTime = $config->get( 'TableProgressTrackingMaxProcessingTime' );

// maximum bytes of wikitext we will try and parse. We will allow parsing of either 50KB by default
// or whatever is configured through $wgTableProcessTrackingMaxInputSize.
// if the wikitext exceeds this, we bail
$this->maxInputSize = $config->get( 'TableProgressTrackingMaxInputSize' );


$size = strlen( $wikitext );
if ( $size > $this->maxInputSize ) {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-wikitext-size', $size, number_format( $this->maxInputSize ) )->text();
return;
}

// Only set the unique column index if it is provided in the arguments
// if not, we validate later that each row passes its own data-row-id
// note we must - 1 from the value the user passsed as an argument as
Expand All @@ -82,8 +147,27 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr
$this->errorMessage = 'The table-id argument is required.';
return;
}
}

$this->loadAndValidateHtml();
/**
* Start the timer to measure how long we have been processing this table
* @return void
*/
private function startProcessingTimer(): void {
$this->startTime = microtime(true);
}

/**
* Check if we've exceeded our processing time limit if we have
* we will bail
* @return bool
*/
private function checkTimeout(): bool {
if ( $this->startTime == 0.0 ) {
// we haven't started yet
return false;
}
return ( microtime( true ) - $this->startTime ) > $this->maxProcessingTime;
}

/**
Expand All @@ -92,6 +176,14 @@ public function __construct( string $wikitext, array $args, Parser $parser, PPFr
* @throws Exception
*/
private function loadAndValidateHtml(): void {

$this->startProcessingTimer();

if ( $this->checkTimeout() ) {
// @TODO: wfMessage
$this->errorMessage = 'Processing timeout exceeded during initialisation.';
return;
}
// first parse our wikitext so we can get the HTML representation if it;
// we use ->recursiveTagParseFully here as we need the final HTML version of the
// table so that we can ensure if unique-column-index is used, and the content of the
Expand All @@ -101,6 +193,18 @@ private function loadAndValidateHtml(): void {
// parser that I can find.
$tableHtml = $this->parser->recursiveTagParseFully( $this->wikitext, $this->frame );

if ( $this->checkTimeout() ) {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-wikitext' )->text();
return;
}

$tableSize = strlen( $tableHtml );

if ( $tableSize > $this->maxHTMLSize ) {
$this->errorMessage = wfMessage( "tableprogresstracking-error-html-size", $tableSize, number_format( $this->maxHTMLSize ) );
return;
}

if ( empty( trim( $tableHtml ) ) ) {
$this->errorMessage = 'Parsing the wikitext resulted in empty HTML.';
return;
Expand All @@ -115,10 +219,14 @@ private function loadAndValidateHtml(): void {
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
);

if ( $this->checkTimeout() ) {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text();
return;
}

$tableNode = $this->dom->getElementsByTagName( 'table' )->item( 0 );

if ( !$tableNode ) {
$this->parser->getOutput()->updateCacheExpiry( 0 );
$this->errorMessage = 'No table was provided for progress tracking. Please include a table between the <table-progress-tracking> tags.';
return;
}
Expand Down Expand Up @@ -146,31 +254,61 @@ private function validateUniqueColumnIndex(): void {
$xpath = new DOMXPath( $this->dom );
$allRows = $xpath->query( './/tr', $this->table );
$maxColumns = 0;
$processedRows = 0;

foreach ( $allRows as $row ) {
if ( $processedRows > $this->maxRows || $this->checkTimeout() ) {
if ( $this->checkTimeout() ) {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text();
}
$this->errorMessage = wfMessage( 'tableprogresstracking-error-max-rows', $this->maxRows )->text();
break;
}

$cellCount = $row->getElementsByTagName( 'td' )->length + $row->getElementsByTagName( 'th' )->length;

if ( $cellCount > $this->maxColumns ) {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-max-columns', $this->maxColumns )->text();
return;
}

$maxColumns = max( $maxColumns, $cellCount );
$processedRows++;
}

if ( $this->uniqueColumnIndex >= $maxColumns ) {
$this->errorMessage = "unique-column-index ({$this->uniqueColumnIndex}) is out of range. Table has {$maxColumns} columns (0-" . ( $maxColumns - 1 ) . ").";
return;
}
}

/**
* Validates that all data rows have data-row-id attributes when unique-column-index is not provided
*/
private function validateDataRowIds(): bool {
if ( $this->checkTimeout() ) {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text();
return false;
}
$xpath = new DOMXPath( $this->dom );
$dataRows = $xpath->query( './/tr[not(th)]', $this->table );
$processedRows = 0;

foreach ( $dataRows as $row ) {
if ( $processedRows >= $this->maxRows || $this->checkTimeout() ) {
if ( $this->checkTimeout() ) {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text();
} else {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-max-rows' )->text();
}
return false;
}

$rowId = $this->extractDataRowId( $row );
if ( empty( $rowId ) ) {
$this->errorMessage = 'When unique-column-index is not provided, all data rows must have a data-row-id attribute.';
return false;
}
$processedRows++;
}

return true;
Expand Down Expand Up @@ -208,15 +346,23 @@ private function extractDataRowId( DOMElement $row ): ?string {
* (also modularrrr)
*
* @return string The final, processed HTML.
* @throws Exception
*/
public function process(): string {
// constructor may have returned an error already, so bail before we even start
if ( $this->hasError() ) {
return self::renderError( htmlspecialchars( $this->getErrorMessage() ) );
return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) );
}

$this->loadAndValidateHtml();

if ( $this->hasError() ) {
return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) );
}

// If no unique-column-index is provided, validate that all rows have data-row-id
if ( $this->uniqueColumnIndex === null && !$this->validateDataRowIds() ) {
return self::renderError( htmlspecialchars( $this->getErrorMessage() ) );
return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) );
}

$this->setTableAttributes();
Expand All @@ -229,6 +375,11 @@ public function process(): string {

$this->processDataRows();

// let's check the erorrs again incase $this->processDataRows exited unsuccessfully
if ( $this->hasError() ) {
return $this->renderError( htmlspecialchars( $this->getErrorMessage() ) );
}

// if we got this far, we can assume the table is valid and ready to be returned
// lets add a tracking category also so we know which pages are using this extension
$this->parser->addTrackingCategory( 'tpt-tracking-category' );
Expand Down Expand Up @@ -271,6 +422,12 @@ private function addProgressHeader(): void {
* Iterates over all data rows (tr without th) and adds the checkbox cell to each.
*/
private function processDataRows(): void {

if ( $this->checkTimeout() ) {
$this->errorMessage = wfMessage( 'tableprogresstracking-error-parsing-html' )->text();
return;
}

$xpath = new DOMXPath( $this->dom );
// this is fucked, but this should be better than just trying to get the tr element with ->getElementByTagName('tr') as that will return all tr elements, including the header ones
$dataRows = $xpath->query( './/tr[not(th)]', $this->table );
Expand Down Expand Up @@ -400,8 +557,9 @@ private function generateFinalHtml(): string {
* @param string $message The error message to display.
* @return string
*/
private static function renderError( string $message ): string {
private function renderError( string $message ): string {
$escapedMessage = htmlspecialchars( $message );
$this->parser->getOutput()->updateCacheExpiry( 0 );
return Html::errorBox( $escapedMessage );
}

Expand Down