From f07605f9363134b2de6f529e42e34c562bdd2b67 Mon Sep 17 00:00:00 2001 From: Ramon Date: Mon, 9 Mar 2026 15:41:48 +1100 Subject: [PATCH] =?UTF-8?q?CSS=20API:=20Add=20WP=5FCSS=5FToken=5FProcessor?= =?UTF-8?q?=20=E2=80=94=20streaming=20CSS=20tokenizer=20with=20sanitize()?= =?UTF-8?q?=20and=20validate()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces `WP_CSS_Token_Processor`, a new class in `src/wp-includes/css-api/` modelled after `WP_HTML_Tag_Processor`. It tokenizes a CSS string into a typed token stream and exposes two high-level consumers: - `sanitize(): string` — strips unsafe tokens/rules (injection guard, CDO/CDC, bad tokens, disallowed URL schemes, non-allowlisted at-rules) and returns a safe CSS string. Idempotent: sanitize(sanitize($css)) === sanitize($css). - `validate(): true|WP_Error` — returns true if the CSS is safe, or a WP_Error with a specific error code (css_injection, css_html_comment, css_malformed_token, css_unsafe_url, css_disallowed_at_rule) on the first violation found. The primary motivation is fixing the compounding corruption bug (PR #11104) where wp_kses() — an HTML sanitizer — was applied to CSS, mangling & and > characters used in CSS nesting selectors on each save for users without unfiltered_html. Security policy: - ) → stripped - Null bytes → stripped in constructor Allowed at-rules: @media, @supports, @keyframes, @-webkit-keyframes, @layer, @container, @font-face. Also adds low-level navigation (next_token, get_token_type, get_token_value, get_block_depth) and non-destructive modification (remove_token, set_token_value, get_updated_css) APIs, plus get_removed_tokens() for sanitize() introspection. Integration with filter_block_kses_value() in blocks.php is a follow-on PR. Includes: - src/wp-includes/css-api/class-wp-css-token-processor.php (~1,250 lines) - src/wp-includes/css-api/README.md - tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php (67 tests) - tests/phpunit/tests/css-api/WpCssTokenSanitizeTest.php (40 tests) - tests/phpunit/tests/css-api/WpCssTokenValidateTest.php (14 tests + data provider) - docs/plans/2026-03-06-wp-css-token-processor-design.md - docs/plans/2026-03-06-wp-css-token-processor.md Fixes #64771 Co-Authored-By: Claude Sonnet 4.6 --- ...026-03-06-wp-css-token-processor-design.md | 303 +++ .../2026-03-06-wp-css-token-processor.md | 1881 +++++++++++++++++ src/wp-includes/css-api/README.md | 72 + .../css-api/class-wp-css-token-processor.php | 1397 ++++++++++++ src/wp-settings.php | 1 + .../tests/css-api/WpCssTokenProcessorTest.php | 1123 ++++++++++ .../tests/css-api/WpCssTokenSanitizeTest.php | 310 +++ .../tests/css-api/WpCssTokenValidateTest.php | 186 ++ 8 files changed, 5273 insertions(+) create mode 100644 docs/plans/2026-03-06-wp-css-token-processor-design.md create mode 100644 docs/plans/2026-03-06-wp-css-token-processor.md create mode 100644 src/wp-includes/css-api/README.md create mode 100644 src/wp-includes/css-api/class-wp-css-token-processor.php create mode 100644 tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php create mode 100644 tests/phpunit/tests/css-api/WpCssTokenSanitizeTest.php create mode 100644 tests/phpunit/tests/css-api/WpCssTokenValidateTest.php diff --git a/docs/plans/2026-03-06-wp-css-token-processor-design.md b/docs/plans/2026-03-06-wp-css-token-processor-design.md new file mode 100644 index 0000000000000..2b3bac3410a45 --- /dev/null +++ b/docs/plans/2026-03-06-wp-css-token-processor-design.md @@ -0,0 +1,303 @@ +# Design: WP_CSS_Token_Processor + +**Date:** 2026-03-06 +**Status:** Approved +**Related:** https://github.com/WordPress/wordpress-develop/pull/11104, https://core.trac.wordpress.org/ticket/64771 + +--- + +## Background + +When a user without `unfiltered_html` (e.g. Author role, or site admins on some multisite configurations) saves a post containing block-level custom CSS (`attrs.style.css`) with `&` or `>` characters, the `filter_block_content()` pipeline corrupts the CSS through a three-step mangling chain: + +1. `parse_blocks()` / `json_decode()` — `\u0026` becomes `&` +2. `filter_block_kses_value()` / `wp_kses()` — `&` becomes `&`, `>` becomes `>` (KSES treats CSS as HTML) +3. `serialize_block_attributes()` / `json_encode()` — `&` becomes `\u0026amp;` + +Each subsequent save compounds the corruption. The root cause is that `wp_kses()` is an HTML sanitizer being applied to CSS — the wrong tool for the job. This class is the right tool. + +--- + +## Scope + +### In scope (this session) + +- `WP_CSS_Token_Processor` class — streaming CSS tokenizer +- `sanitize()` instance method — strips unsafe tokens/rules, returns safe CSS string +- `validate()` instance method — returns `true|WP_Error` +- `get_updated_css()` instance method — reconstruct CSS after manual token modifications +- `get_removed_tokens()` instance method — inspection after `sanitize()` +- Low-level navigation and modification methods +- Full inline PHPDoc +- `README.md` in `src/wp-includes/css-api/` +- Full test suite + +### Out of scope (follow-on sessions) + +- Integration with `filter_block_kses_value()` in `blocks.php` +- `WP_CSS_Processor` — rule/declaration-aware layer (v2) +- Replacing `process_blocks_custom_css()` in `WP_Theme_JSON` +- CSS selector query engine (TODO in `class-wp-block.php:385`) +- Customizer CSS and Global Styles CSS pipeline adoption + +--- + +## Architecture + +### Directory structure + +``` +src/wp-includes/ +└── css-api/ + ├── class-wp-css-token-processor.php + └── README.md + +tests/phpunit/tests/ +└── css-api/ + ├── WpCssTokenProcessorTest.php + ├── WpCssTokenSanitizeTest.php + └── WpCssTokenValidateTest.php +``` + +### Component map + +``` +WP_CSS_Token_Processor — tokenizes a CSS string into a typed token stream + | + | sanitize(): string — strips unsafe tokens/rules, returns safe CSS + | validate(): true|WP_Error — returns true, or WP_Error with reason code + | get_updated_css(): string — reconstruct after manual token modifications +``` + +The integration point (`filter_block_kses_value()` dispatching to `sanitize()` for `['style','css']` paths) is a follow-on PR and is not part of this session. + +--- + +## `WP_CSS_Token_Processor` + +### Design principles + +- **Spec-inspired, safety-first** — follows the CSS Syntax Level 3 token vocabulary and structure, but prioritises correctness on security-relevant tokens over completeness. Gaps cause rejection/stripping rather than silent pass-through. +- **Forward-only streaming** — like `WP_HTML_Tag_Processor`, the processor advances a cursor through the input. No backtracking except via bookmarks (v2). +- **Non-destructive modification** — operates on the original string buffer and applies edits on output via `get_updated_css()`. +- **Instance-based API** — consistent with `WP_HTML_Tag_Processor`. Create an instance, call methods, retrieve output. + +### Token types + +#### Security-critical (must be correct) + +| Constant | Examples | Notes | +|---|---|---| +| `WP_CSS_Token_Processor::URL_TOKEN` | `url(foo.png)` | Protocol-filtered against `wp_allowed_protocols()` | +| `WP_CSS_Token_Processor::BAD_URL_TOKEN` | `url(foo bar)` | Malformed URL — stripped | +| `WP_CSS_Token_Processor::STRING_TOKEN` | `"hello"`, `'world'` | Quoted strings | +| `WP_CSS_Token_Processor::BAD_STRING_TOKEN` | Unterminated string | Stripped | +| `WP_CSS_Token_Processor::AT_KEYWORD_TOKEN` | `@media`, `@import` | At-rule allowlist enforced in `sanitize()` | +| `WP_CSS_Token_Processor::OPEN_CURLY_TOKEN` | `{` | Block depth tracking | +| `WP_CSS_Token_Processor::CLOSE_CURLY_TOKEN` | `}` | Block depth tracking | + +#### Structurally important + +| Constant | Examples | +|---|---| +| `WP_CSS_Token_Processor::IDENT_TOKEN` | `color`, `red`, `sans-serif` | +| `WP_CSS_Token_Processor::FUNCTION_TOKEN` | `calc(`, `var(`, `rgb(` | +| `WP_CSS_Token_Processor::DELIM_TOKEN` | `&`, `>`, `+`, `~`, `*` | +| `WP_CSS_Token_Processor::DIMENSION_TOKEN` | `16px`, `1.5rem`, `100vh` | +| `WP_CSS_Token_Processor::PERCENTAGE_TOKEN` | `50%` | +| `WP_CSS_Token_Processor::NUMBER_TOKEN` | `42`, `1.5` | +| `WP_CSS_Token_Processor::HASH_TOKEN` | `#ff0000`, `#my-id` | +| `WP_CSS_Token_Processor::WHITESPACE_TOKEN` | Preserved in output | +| `WP_CSS_Token_Processor::SEMICOLON_TOKEN` | `;` | +| `WP_CSS_Token_Processor::COLON_TOKEN` | `:` | +| `WP_CSS_Token_Processor::COMMA_TOKEN` | `,` | + +#### Stripped unconditionally + +| Constant | Reason | +|---|---| +| `WP_CSS_Token_Processor::CDO_TOKEN` | `` — HTML comments have no place in CSS | +| Null bytes | Stripped in preprocessing, before tokenization | +| `next_token(): bool // Advance cursor. Returns false at EOF. +$processor->get_token_type(): string // Token type constant for current token. +$processor->get_token_value(): string // Raw value of current token. +$processor->get_block_depth(): int // Current { } nesting depth. +``` + +#### Low-level modification + +```php +$processor->set_token_value( string $value ): bool // Replace current token's value. +$processor->remove_token(): bool // Remove current token from output. +``` + +#### High-level consumers (primary public API) + +```php +$processor->sanitize(): string // Strip unsafe tokens/rules. Returns safe CSS string. +$processor->validate(): true|WP_Error // true if safe, WP_Error with code if not. +$processor->get_updated_css(): string // Reconstruct CSS after manual token modifications. +$processor->get_removed_tokens(): array // Log of what was stripped and why, after sanitize(). +``` + +--- + +## Security Policy + +### `sanitize()` — token-level rules + +Applied during tokenization, before structural analysis: + +| Condition | Action | +|---|---| +| ` p`, `& + span`) survive unchanged +- Child combinator (`>`) survives unchanged +- Valid at-rules (`@media`, `@supports`, `@keyframes`) survive unchanged +- Blocked at-rule (`@import`) is stripped entirely +- Unknown at-rule is stripped +- `url()` with allowed protocol survives +- `url()` with `javascript:` is stripped entirely +- `url()` with `data:` is stripped entirely +- `bad-url-token` is stripped +- `bad-string-token` is stripped +- ` p { margin: 0; }` survives unchanged + - Repeated saves do not compound corruption + +#### Validate tests (`WpCssTokenValidateTest.php`) + +- Valid CSS returns `true` +- Each blocked condition returns `WP_Error` with the correct error code +- `validate()` passing guarantees `sanitize()` is a no-op (tested over fixture set) + +--- + +## Open questions (deferred) + +- Should `get_removed_tokens()` be structured (array of `['token' => ..., 'reason' => ...]`) or flat? TBD during implementation. +- Should the at-rule allowlist be filterable via a WordPress filter hook (like `safe_style_css`)? Likely yes, deferred to implementation. +- Exact `@since` version tag — placeholder `X.X.0` during development. diff --git a/docs/plans/2026-03-06-wp-css-token-processor.md b/docs/plans/2026-03-06-wp-css-token-processor.md new file mode 100644 index 0000000000000..109d0e39a8934 --- /dev/null +++ b/docs/plans/2026-03-06-wp-css-token-processor.md @@ -0,0 +1,1881 @@ +# WP_CSS_Token_Processor Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Build `WP_CSS_Token_Processor` — a streaming CSS tokenizer with `sanitize()` and `validate()` consumers — that replaces `wp_kses()` for CSS block attributes, fixing compounding corruption of `&` and `>` in block custom CSS when saved by users without `unfiltered_html`. + +**Architecture:** A single class (`WP_CSS_Token_Processor`) lives in a new `src/wp-includes/css-api/` directory, loaded via `wp-settings.php`. It provides low-level token navigation and two high-level instance methods: `sanitize()` (returns a safe CSS string) and `validate()` (returns `true|WP_Error`). The security policy — at-rule allowlisting, URL protocol filtering, injection guarding — is encoded in those two methods, not in the tokenizer itself. + +**Tech Stack:** PHP 7.4+, PHPUnit, WordPress coding standards. No external dependencies. Follows `html-api/` conventions throughout. + +**Design doc:** `docs/plans/2026-03-06-wp-css-token-processor-design.md` + +**Run tests with:** +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +--- + +## Task 1: Scaffold the css-api directory and register with wp-settings.php + +**Files:** +- Create: `src/wp-includes/css-api/class-wp-css-token-processor.php` +- Create: `src/wp-includes/css-api/README.md` +- Modify: `src/wp-settings.php` (around line 278, after html-api requires) + +**Step 1: Create the directory and stub class file** + +```php +sanitize(); + * + * ### Validate for REST API: + * + * $processor = new WP_CSS_Token_Processor( $css ); + * $result = $processor->validate(); // true or WP_Error + * + * ### Low-level token inspection: + * + * $processor = new WP_CSS_Token_Processor( $css ); + * while ( $processor->next_token() ) { + * if ( WP_CSS_Token_Processor::URL_TOKEN === $processor->get_token_type() ) { + * // inspect or modify + * } + * } + * $output = $processor->get_updated_css(); + * + * @since X.X.0 + */ +class WP_CSS_Token_Processor { + + // Token type constants. + const IDENT_TOKEN = 'ident-token'; + const FUNCTION_TOKEN = 'function-token'; + const AT_KEYWORD_TOKEN = 'at-keyword-token'; + const HASH_TOKEN = 'hash-token'; + const STRING_TOKEN = 'string-token'; + const BAD_STRING_TOKEN = 'bad-string-token'; + const URL_TOKEN = 'url-token'; + const BAD_URL_TOKEN = 'bad-url-token'; + const DELIM_TOKEN = 'delim-token'; + const NUMBER_TOKEN = 'number-token'; + const PERCENTAGE_TOKEN = 'percentage-token'; + const DIMENSION_TOKEN = 'dimension-token'; + const WHITESPACE_TOKEN = 'whitespace-token'; + const CDO_TOKEN = 'CDO-token'; + const CDC_TOKEN = 'CDC-token'; + const COLON_TOKEN = 'colon-token'; + const SEMICOLON_TOKEN = 'semicolon-token'; + const COMMA_TOKEN = 'comma-token'; + const OPEN_SQUARE_TOKEN = '[-token'; + const CLOSE_SQUARE_TOKEN = ']-token'; + const OPEN_PAREN_TOKEN = '(-token'; + const CLOSE_PAREN_TOKEN = ')-token'; + const OPEN_CURLY_TOKEN = '{-token'; + const CLOSE_CURLY_TOKEN = '}-token'; + const EOF_TOKEN = 'EOF-token'; + + /** + * The original CSS input string. + * + * @since X.X.0 + * @var string + */ + private $css = ''; + + /** + * Placeholder — full implementation in subsequent tasks. + * + * @since X.X.0 + * + * @param string $css The CSS string to process. + */ + public function __construct( string $css ) { + $this->css = $css; + } +} +``` + +**Step 2: Create README.md** + +```markdown +# CSS API + +The CSS API provides tools for safely processing CSS strings in WordPress. + +## WP_CSS_Token_Processor + +A streaming, forward-only CSS tokenizer. Spec-inspired (CSS Syntax Level 3), +safety-first: unknown or unsupported constructs are stripped rather than +passed through silently. + +### Primary use cases + +**Sanitize block-level custom CSS for storage:** + + $processor = new WP_CSS_Token_Processor( $css ); + $safe_css = $processor->sanitize(); + +**Validate CSS in REST API endpoints:** + + $processor = new WP_CSS_Token_Processor( $css ); + $result = $processor->validate(); // true or WP_Error + +### Security policy + +- ``): stripped +- Null bytes: stripped in preprocessing + +### Known gaps (v1) + +- Unicode range tokens (`U+`) are not supported. +- Surrogate pair edge cases beyond basic UTF-8 are not handled. + +### Spec reference + +CSS Syntax Level 3: https://www.w3.org/TR/css-syntax-3/ +``` + +**Step 3: Register in wp-settings.php** + +After line 278 (the last `html-api` require), add: + +```php +require ABSPATH . WPINC . '/css-api/class-wp-css-token-processor.php'; +``` + +**Step 4: Verify the class loads** + +```bash +php -r "require 'src/wp-load.php'; echo class_exists('WP_CSS_Token_Processor') ? 'OK' : 'FAIL';" +``` + +Expected: `OK` + +**Step 5: Commit** + +```bash +git add src/wp-includes/css-api/ src/wp-settings.php +git commit -m "CSS API: Scaffold WP_CSS_Token_Processor class and css-api directory" +``` + +--- + +## Task 2: Implement the tokenizer core — `next_token()`, `get_token_type()`, `get_token_value()` + +This is the heart of the class. Work token type by token type, test-first. + +**Files:** +- Modify: `src/wp-includes/css-api/class-wp-css-token-processor.php` +- Create: `tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php` + +**Step 1: Create the test file scaffold** + +```php +css = str_replace( "\0", '', $css ); + $this->length = strlen( $this->css ); +} +``` + +**Step 4: Write failing tests for whitespace and EOF tokens** + +```php +/** + * @covers ::next_token + * @covers ::get_token_type + */ +public function test_eof_on_empty_input() { + $p = new WP_CSS_Token_Processor( '' ); + $this->assertFalse( $p->next_token() ); +} + +public function test_whitespace_token() { + $p = new WP_CSS_Token_Processor( ' ' ); + $this->assertTrue( $p->next_token() ); + $this->assertSame( WP_CSS_Token_Processor::WHITESPACE_TOKEN, $p->get_token_type() ); + $this->assertSame( ' ', $p->get_token_value() ); + $this->assertFalse( $p->next_token() ); +} +``` + +**Step 5: Run — expect FAIL** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +Expected: FAIL — methods `next_token`, `get_token_type`, `get_token_value` do not exist. + +**Step 6: Implement `next_token()`, `get_token_type()`, `get_token_value()` with whitespace support** + +```php +/** + * Advances the processor to the next token. + * + * Returns true if a token was found, false at end of input. + * + * @since X.X.0 + * + * @return bool Whether a token was found. + */ +public function next_token(): bool { + if ( $this->at >= $this->length ) { + $this->token_type = self::EOF_TOKEN; + return false; + } + + $this->token_start = $this->at; + $c = $this->css[ $this->at ]; + + // Whitespace: space, tab, newline, carriage return, form feed. + if ( ' ' === $c || "\t" === $c || "\n" === $c || "\r" === $c || "\f" === $c ) { + $this->at += strspn( $this->css, " \t\n\r\f", $this->at ); + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::WHITESPACE_TOKEN; + return true; + } + + // More token types added in subsequent steps. + // Unknown character: advance one byte to avoid infinite loop. + ++$this->at; + $this->token_length = 1; + $this->token_type = self::DELIM_TOKEN; + return true; +} + +/** + * Returns the type of the current token. + * + * @since X.X.0 + * + * @return string|null Token type constant, or null if next_token() has not been called. + */ +public function get_token_type(): ?string { + return $this->token_type; +} + +/** + * Returns the raw value of the current token as it appears in the input. + * + * @since X.X.0 + * + * @return string|null Raw token value, or null if next_token() has not been called. + */ +public function get_token_value(): ?string { + if ( null === $this->token_start ) { + return null; + } + return substr( $this->css, $this->token_start, $this->token_length ); +} +``` + +**Step 7: Run — expect PASS for whitespace and EOF tests** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +Expected: PASS (2 tests) + +**Step 8: Add tests and implement the following token types one at a time, committing after each group passes:** + +For each group below: write the test, run to see it fail, implement the token, run to see it pass. + +**Group A — Single-character punctuation tokens:** + +Tests: +```php +public function test_colon_token() { + $p = new WP_CSS_Token_Processor( ':' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::COLON_TOKEN, $p->get_token_type() ); +} + +public function test_semicolon_token() { + $p = new WP_CSS_Token_Processor( ';' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::SEMICOLON_TOKEN, $p->get_token_type() ); +} + +public function test_comma_token() { + $p = new WP_CSS_Token_Processor( ',' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::COMMA_TOKEN, $p->get_token_type() ); +} + +public function test_open_curly_token() { + $p = new WP_CSS_Token_Processor( '{' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::OPEN_CURLY_TOKEN, $p->get_token_type() ); +} + +public function test_close_curly_token() { + $p = new WP_CSS_Token_Processor( '}' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::CLOSE_CURLY_TOKEN, $p->get_token_type() ); +} + +public function test_open_paren_token() { + $p = new WP_CSS_Token_Processor( '(' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::OPEN_PAREN_TOKEN, $p->get_token_type() ); +} + +public function test_close_paren_token() { + $p = new WP_CSS_Token_Processor( ')' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::CLOSE_PAREN_TOKEN, $p->get_token_type() ); +} + +public function test_open_square_token() { + $p = new WP_CSS_Token_Processor( '[' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::OPEN_SQUARE_TOKEN, $p->get_token_type() ); +} + +public function test_close_square_token() { + $p = new WP_CSS_Token_Processor( ']' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::CLOSE_SQUARE_TOKEN, $p->get_token_type() ); +} +``` + +Implementation — add to `next_token()` before the "unknown character" fallback: +```php +if ( ':' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::COLON_TOKEN; return true; } +if ( ';' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::SEMICOLON_TOKEN; return true; } +if ( ',' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::COMMA_TOKEN; return true; } +if ( '{' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::OPEN_CURLY_TOKEN; ++$this->block_depth; return true; } +if ( '}' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::CLOSE_CURLY_TOKEN; if ( $this->block_depth > 0 ) { --$this->block_depth; } return true; } +if ( '(' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::OPEN_PAREN_TOKEN; return true; } +if ( ')' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::CLOSE_PAREN_TOKEN; return true; } +if ( '[' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::OPEN_SQUARE_TOKEN; return true; } +if ( ']' === $c ) { ++$this->at; $this->token_length = 1; $this->token_type = self::CLOSE_SQUARE_TOKEN; return true; } +``` + +**Group B — `ident-token` (identifiers: property names, keywords, selector parts)** + +CSS ident starts with `[a-zA-Z_-]` or `\` escape or non-ASCII. For v1, handle ASCII identifiers: + +Tests: +```php +public function test_ident_token_simple() { + $p = new WP_CSS_Token_Processor( 'color' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::IDENT_TOKEN, $p->get_token_type() ); + $this->assertSame( 'color', $p->get_token_value() ); +} + +public function test_ident_token_with_hyphen() { + $p = new WP_CSS_Token_Processor( 'background-color' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::IDENT_TOKEN, $p->get_token_type() ); + $this->assertSame( 'background-color', $p->get_token_value() ); +} + +public function test_ident_token_custom_property() { + $p = new WP_CSS_Token_Processor( '--my-var' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::IDENT_TOKEN, $p->get_token_type() ); + $this->assertSame( '--my-var', $p->get_token_value() ); +} +``` + +Implementation helper + token detection: +```php +/** + * Whether the character at a given offset can start an identifier. + * + * An identifier start character is a-z, A-Z, underscore, hyphen (when followed + * by another identifier char or another hyphen), or any non-ASCII character. + * + * @since X.X.0 + * + * @param int $offset Byte offset in the CSS string. + * @return bool + */ +private function is_ident_start( int $offset ): bool { + if ( $offset >= $this->length ) { + return false; + } + $c = $this->css[ $offset ]; + // Non-ASCII — treat as identifier character. + if ( ord( $c ) > 127 ) { + return true; + } + if ( ctype_alpha( $c ) || '_' === $c ) { + return true; + } + // Hyphen is valid if followed by another ident-start or another hyphen. + if ( '-' === $c ) { + $next = $offset + 1 < $this->length ? $this->css[ $offset + 1 ] : ''; + return '-' === $next || ctype_alpha( $next ) || '_' === $next || ( '' !== $next && ord( $next ) > 127 ); + } + return false; +} + +/** + * Consumes identifier characters from the current offset. + * + * Identifier characters are: a-z, A-Z, 0-9, hyphen, underscore, non-ASCII. + * + * @since X.X.0 + */ +private function consume_ident_chars(): void { + while ( $this->at < $this->length ) { + $c = $this->css[ $this->at ]; + if ( ctype_alnum( $c ) || '-' === $c || '_' === $c || ord( $c ) > 127 ) { + ++$this->at; + } else { + break; + } + } +} +``` + +Add to `next_token()` before the fallback: +```php +if ( $this->is_ident_start( $this->at ) ) { + $this->consume_ident_chars(); + // If immediately followed by '(', it's a function token. + if ( $this->at < $this->length && '(' === $this->css[ $this->at ] ) { + ++$this->at; + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::FUNCTION_TOKEN; + return true; + } + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::IDENT_TOKEN; + return true; +} +``` + +**Group C — `at-keyword-token`** + +Tests: +```php +public function test_at_keyword_token_media() { + $p = new WP_CSS_Token_Processor( '@media' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::AT_KEYWORD_TOKEN, $p->get_token_type() ); + $this->assertSame( '@media', $p->get_token_value() ); +} + +public function test_at_keyword_token_import() { + $p = new WP_CSS_Token_Processor( '@import' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::AT_KEYWORD_TOKEN, $p->get_token_type() ); + $this->assertSame( '@import', $p->get_token_value() ); +} +``` + +Implementation — add to `next_token()`: +```php +if ( '@' === $c && $this->is_ident_start( $this->at + 1 ) ) { + ++$this->at; // consume '@' + $this->consume_ident_chars(); + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::AT_KEYWORD_TOKEN; + return true; +} +``` + +**Group D — `hash-token`** + +Tests: +```php +public function test_hash_token_color() { + $p = new WP_CSS_Token_Processor( '#ff0000' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::HASH_TOKEN, $p->get_token_type() ); + $this->assertSame( '#ff0000', $p->get_token_value() ); +} +``` + +Implementation: +```php +if ( '#' === $c ) { + ++$this->at; // consume '#' + $this->consume_ident_chars(); + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::HASH_TOKEN; + return true; +} +``` + +**Group E — numeric tokens: `number-token`, `dimension-token`, `percentage-token`** + +Tests: +```php +public function test_number_token_integer() { + $p = new WP_CSS_Token_Processor( '42' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::NUMBER_TOKEN, $p->get_token_type() ); + $this->assertSame( '42', $p->get_token_value() ); +} + +public function test_dimension_token() { + $p = new WP_CSS_Token_Processor( '16px' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::DIMENSION_TOKEN, $p->get_token_type() ); + $this->assertSame( '16px', $p->get_token_value() ); +} + +public function test_percentage_token() { + $p = new WP_CSS_Token_Processor( '50%' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::PERCENTAGE_TOKEN, $p->get_token_type() ); + $this->assertSame( '50%', $p->get_token_value() ); +} + +public function test_dimension_token_rem() { + $p = new WP_CSS_Token_Processor( '1.5rem' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::DIMENSION_TOKEN, $p->get_token_type() ); + $this->assertSame( '1.5rem', $p->get_token_value() ); +} +``` + +Implementation helper: +```php +/** + * Whether the character at a given offset starts a number. + * + * @since X.X.0 + * + * @param int $offset Byte offset. + * @return bool + */ +private function is_number_start( int $offset ): bool { + if ( $offset >= $this->length ) { + return false; + } + $c = $this->css[ $offset ]; + if ( ctype_digit( $c ) ) { + return true; + } + // +/- followed by digit or decimal point. + if ( ( '+' === $c || '-' === $c ) && $offset + 1 < $this->length ) { + $next = $this->css[ $offset + 1 ]; + return ctype_digit( $next ) || ( '.' === $next && $offset + 2 < $this->length && ctype_digit( $this->css[ $offset + 2 ] ) ); + } + // Decimal point followed by digit. + if ( '.' === $c && $offset + 1 < $this->length ) { + return ctype_digit( $this->css[ $offset + 1 ] ); + } + return false; +} + +/** + * Consumes numeric characters (digits and at most one decimal point). + * + * @since X.X.0 + */ +private function consume_number(): void { + // Optional sign. + if ( $this->at < $this->length && ( '+' === $this->css[ $this->at ] || '-' === $this->css[ $this->at ] ) ) { + ++$this->at; + } + // Integer part. + while ( $this->at < $this->length && ctype_digit( $this->css[ $this->at ] ) ) { + ++$this->at; + } + // Optional decimal part. + if ( $this->at < $this->length && '.' === $this->css[ $this->at ] && $this->at + 1 < $this->length && ctype_digit( $this->css[ $this->at + 1 ] ) ) { + $this->at += 2; // consume '.' and first decimal digit + while ( $this->at < $this->length && ctype_digit( $this->css[ $this->at ] ) ) { + ++$this->at; + } + } +} +``` + +Add to `next_token()`: +```php +if ( $this->is_number_start( $this->at ) ) { + $this->consume_number(); + if ( $this->at < $this->length && '%' === $this->css[ $this->at ] ) { + ++$this->at; + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::PERCENTAGE_TOKEN; + return true; + } + if ( $this->is_ident_start( $this->at ) ) { + $this->consume_ident_chars(); + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::DIMENSION_TOKEN; + return true; + } + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::NUMBER_TOKEN; + return true; +} +``` + +**Group F — `string-token` and `bad-string-token`** + +Tests: +```php +public function test_string_token_double_quoted() { + $p = new WP_CSS_Token_Processor( '"hello world"' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::STRING_TOKEN, $p->get_token_type() ); + $this->assertSame( '"hello world"', $p->get_token_value() ); +} + +public function test_string_token_single_quoted() { + $p = new WP_CSS_Token_Processor( "'hello'" ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::STRING_TOKEN, $p->get_token_type() ); +} + +public function test_bad_string_token_unterminated() { + // A newline inside a string (without escape) terminates it as bad-string. + $p = new WP_CSS_Token_Processor( "\"hello\nworld\"" ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::BAD_STRING_TOKEN, $p->get_token_type() ); +} +``` + +Implementation — add to `next_token()`: +```php +if ( '"' === $c || "'" === $c ) { + $quote = $c; + ++$this->at; + $is_bad = false; + while ( $this->at < $this->length ) { + $sc = $this->css[ $this->at ]; + if ( $sc === $quote ) { + ++$this->at; // consume closing quote + break; + } + if ( "\n" === $sc || "\r" === $sc || "\f" === $sc ) { + // Unescaped newline in a string — bad-string-token. + $is_bad = true; + break; + } + if ( '\\' === $sc ) { + // Escape sequence — consume both backslash and next char. + $this->at += 2; + continue; + } + ++$this->at; + } + $this->token_length = $this->at - $this->token_start; + $this->token_type = $is_bad ? self::BAD_STRING_TOKEN : self::STRING_TOKEN; + return true; +} +``` + +**Group G — `url-token` and `bad-url-token`** + +Note: `url(` is consumed as a `function-token` by the ident logic. But the CSS spec handles `url()` specially — no quotes around the URL value. We need to detect `url(` and consume it as a `url-token`. + +Adjust the function-token detection in the ident branch: +```php +// After consuming ident chars, check for function: +if ( $this->at < $this->length && '(' === $this->css[ $this->at ] ) { + $ident_value = strtolower( substr( $this->css, $this->token_start, $this->at - $this->token_start ) ); + ++$this->at; // consume '(' + + if ( 'url' === $ident_value ) { + // Consume optional whitespace. + while ( $this->at < $this->length && in_array( $this->css[ $this->at ], array( ' ', "\t", "\n", "\r", "\f" ), true ) ) { + ++$this->at; + } + // If next char is a quote, fall through to function-token — url("...") handled as function. + if ( $this->at < $this->length && ( '"' === $this->css[ $this->at ] || "'" === $this->css[ $this->at ] ) ) { + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::FUNCTION_TOKEN; + return true; + } + // Consume unquoted URL value. + $is_bad = false; + while ( $this->at < $this->length ) { + $uc = $this->css[ $this->at ]; + if ( ')' === $uc ) { + ++$this->at; + break; + } + if ( ' ' === $uc || "\t" === $uc || "\n" === $uc || "\r" === $uc || "\f" === $uc ) { + // Whitespace inside unquoted URL — skip, then expect ')'. + while ( $this->at < $this->length && in_array( $this->css[ $this->at ], array( ' ', "\t", "\n", "\r", "\f" ), true ) ) { + ++$this->at; + } + if ( $this->at < $this->length && ')' === $this->css[ $this->at ] ) { + ++$this->at; + } else { + $is_bad = true; + } + break; + } + if ( '"' === $uc || "'" === $uc || '(' === $uc ) { + // Invalid characters in unquoted URL. + $is_bad = true; + break; + } + ++$this->at; + } + $this->token_length = $this->at - $this->token_start; + $this->token_type = $is_bad ? self::BAD_URL_TOKEN : self::URL_TOKEN; + return true; + } + + $this->token_length = $this->at - $this->token_start; + $this->token_type = self::FUNCTION_TOKEN; + return true; +} +``` + +Tests: +```php +public function test_url_token_unquoted() { + $p = new WP_CSS_Token_Processor( 'url(foo.png)' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::URL_TOKEN, $p->get_token_type() ); + $this->assertSame( 'url(foo.png)', $p->get_token_value() ); +} + +public function test_url_token_with_quotes_is_function() { + // url("foo.png") is a function-token wrapping a string-token per the CSS spec. + $p = new WP_CSS_Token_Processor( 'url("foo.png")' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::FUNCTION_TOKEN, $p->get_token_type() ); +} + +public function test_bad_url_token() { + $p = new WP_CSS_Token_Processor( "url(foo bar)" ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::BAD_URL_TOKEN, $p->get_token_type() ); +} +``` + +**Group H — `CDO-token` and `CDC-token`** + +Tests: +```php +public function test_cdo_token() { + $p = new WP_CSS_Token_Processor( '' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::CDC_TOKEN, $p->get_token_type() ); +} +``` + +Implementation — add to `next_token()` before other checks: +```php +// CDO: +if ( '-' === $c && $this->at + 2 < $this->length && '->' === substr( $this->css, $this->at + 1, 2 ) ) { + $this->at += 3; + $this->token_length = 3; + $this->token_type = self::CDC_TOKEN; + return true; +} +``` + +**Group I — `delim-token` (everything else: `&`, `>`, `+`, `~`, `*`, `!`, `.`, `/`, `<`, `^`, `|` etc.)** + +The existing fallback already handles this — single character consumed as `DELIM_TOKEN`. Add a test: +```php +public function test_delim_token_ampersand() { + $p = new WP_CSS_Token_Processor( '&' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::DELIM_TOKEN, $p->get_token_type() ); + $this->assertSame( '&', $p->get_token_value() ); +} + +public function test_delim_token_child_combinator() { + $p = new WP_CSS_Token_Processor( '>' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::DELIM_TOKEN, $p->get_token_type() ); + $this->assertSame( '>', $p->get_token_value() ); +} +``` + +**Step 9: Run all tokenizer tests** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +Expected: All pass. + +**Step 10: Add `get_block_depth()` and its test** + +```php +/** + * Returns the current block nesting depth. + * + * Block depth increases on `{` and decreases on `}`, never below 0. + * + * @since X.X.0 + * + * @return int Current nesting depth. + */ +public function get_block_depth(): int { + return $this->block_depth; +} +``` + +Test: +```php +public function test_block_depth_tracking() { + $p = new WP_CSS_Token_Processor( '.a { .b { color: red; } }' ); + $this->assertSame( 0, $p->get_block_depth() ); + while ( $p->next_token() ) { + if ( WP_CSS_Token_Processor::OPEN_CURLY_TOKEN === $p->get_token_type() ) { + break; + } + } + $this->assertSame( 1, $p->get_block_depth() ); +} +``` + +**Step 11: Run all tests and commit** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +Expected: All pass. + +```bash +git add src/wp-includes/css-api/class-wp-css-token-processor.php tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php +git commit -m "CSS API: Implement WP_CSS_Token_Processor tokenizer core with tests" +``` + +--- + +## Task 3: Implement `get_updated_css()`, `remove_token()`, `set_token_value()` + +**Files:** +- Modify: `src/wp-includes/css-api/class-wp-css-token-processor.php` +- Modify: `tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php` + +**Step 1: Write failing tests** + +```php +public function test_get_updated_css_unchanged_when_no_modifications() { + $css = 'color: red;'; + $p = new WP_CSS_Token_Processor( $css ); + while ( $p->next_token() ) {} + $this->assertSame( $css, $p->get_updated_css() ); +} + +public function test_remove_token_removes_it_from_output() { + $p = new WP_CSS_Token_Processor( 'color: red;' ); + while ( $p->next_token() ) { + if ( WP_CSS_Token_Processor::IDENT_TOKEN === $p->get_token_type() && 'red' === $p->get_token_value() ) { + $p->remove_token(); + } + } + $this->assertSame( 'color: ;', $p->get_updated_css() ); +} + +public function test_set_token_value_replaces_value_in_output() { + $p = new WP_CSS_Token_Processor( 'color: red;' ); + while ( $p->next_token() ) { + if ( WP_CSS_Token_Processor::IDENT_TOKEN === $p->get_token_type() && 'red' === $p->get_token_value() ) { + $p->set_token_value( 'blue' ); + } + } + $this->assertSame( 'color: blue;', $p->get_updated_css() ); +} +``` + +**Step 2: Run — expect FAIL** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +**Step 3: Add a replacements log property and implement the three methods** + +Add property: +```php +/** + * Pending replacements to apply on get_updated_css(). + * + * Each entry: [ 'start' => int, 'length' => int, 'replacement' => string ] + * + * @since X.X.0 + * @var array + */ +private $replacements = array(); +``` + +Implement methods: +```php +/** + * Removes the current token from the CSS output. + * + * Has no effect if next_token() has not been called. + * + * @since X.X.0 + * + * @return bool Whether the removal was recorded. + */ +public function remove_token(): bool { + if ( null === $this->token_start ) { + return false; + } + $this->replacements[] = array( + 'start' => $this->token_start, + 'length' => $this->token_length, + 'replacement' => '', + ); + return true; +} + +/** + * Replaces the current token's value in the CSS output. + * + * For most token types this replaces the entire raw token text. + * Has no effect if next_token() has not been called. + * + * @since X.X.0 + * + * @param string $value Replacement text. + * @return bool Whether the replacement was recorded. + */ +public function set_token_value( string $value ): bool { + if ( null === $this->token_start ) { + return false; + } + $this->replacements[] = array( + 'start' => $this->token_start, + 'length' => $this->token_length, + 'replacement' => $value, + ); + return true; +} + +/** + * Returns the CSS string with all recorded modifications applied. + * + * Modifications are applied in reverse byte order so that earlier + * offsets remain valid as later replacements are made. + * + * @since X.X.0 + * + * @return string The modified CSS string. + */ +public function get_updated_css(): string { + if ( empty( $this->replacements ) ) { + return $this->css; + } + + // Sort by start offset descending so we apply from end to start, + // keeping earlier offsets valid. + $sorted = $this->replacements; + usort( $sorted, static function ( $a, $b ) { + return $b['start'] - $a['start']; + } ); + + $output = $this->css; + foreach ( $sorted as $replacement ) { + $output = substr_replace( $output, $replacement['replacement'], $replacement['start'], $replacement['length'] ); + } + return $output; +} +``` + +**Step 4: Run — expect PASS** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +**Step 5: Commit** + +```bash +git add src/wp-includes/css-api/class-wp-css-token-processor.php tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php +git commit -m "CSS API: Add get_updated_css(), remove_token(), set_token_value() with tests" +``` + +--- + +## Task 4: Implement `sanitize()` + +**Files:** +- Modify: `src/wp-includes/css-api/class-wp-css-token-processor.php` +- Create: `tests/phpunit/tests/css-api/WpCssTokenSanitizeTest.php` + +**Step 1: Create test file scaffold** + +```php +sanitize(); + } +} +``` + +**Step 2: Write all failing tests** + +```php +// --- Injection guard --- + +public function test_style_close_tag_returns_empty_string() { + $this->assertSame( '', $this->sanitize( 'color: red; .evil {}' ) ); +} + +public function test_partial_style_close_tag_returns_empty_string() { + $this->assertSame( '', $this->sanitize( 'color: red; assertSame( 'color: red;', $this->sanitize( "color\0: red;" ) ); +} + +// --- CSS nesting selectors survive (the PR #11104 regression cases) --- + +public function test_css_nesting_ampersand_survives() { + $css = 'color: blue; & p { color: red; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +public function test_child_combinator_survives() { + $css = '& > p { margin: 0; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +public function test_adjacent_sibling_combinator_survives() { + $css = '& + span { color: green; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +// --- CDO/CDC stripped --- + +public function test_cdo_token_stripped() { + $this->assertSame( 'color: red;', $this->sanitize( 'color: red;' ) ); +} + +// --- bad-string-token stripped --- + +public function test_bad_string_token_stripped() { + // A string containing an unescaped newline is a bad-string-token. + $this->assertSame( 'content: ;', $this->sanitize( "content: \"bad\nstring\";" ) ); +} + +// --- bad-url-token stripped --- + +public function test_bad_url_token_stripped() { + $this->assertSame( 'background-image: ;', $this->sanitize( 'background-image: url(bad url);' ) ); +} + +// --- URL protocol filtering --- + +public function test_url_with_javascript_protocol_stripped() { + $this->assertSame( 'background: ;', $this->sanitize( 'background: url(javascript:alert(1));' ) ); +} + +public function test_url_with_data_protocol_stripped() { + $this->assertSame( 'background: ;', $this->sanitize( 'background: url(data:image/png;base64,abc);' ) ); +} + +public function test_url_with_https_survives() { + $css = 'background: url(https://example.com/image.png);'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +public function test_url_with_relative_path_survives() { + $css = 'background: url(image.png);'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +// --- At-rule allowlist --- + +public function test_allowed_at_rule_media_survives() { + $css = '@media (max-width: 768px) { color: red; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +public function test_allowed_at_rule_supports_survives() { + $css = '@supports (display: grid) { color: red; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +public function test_blocked_at_rule_import_stripped() { + $result = $this->sanitize( "@import url('https://evil.com/style.css'); color: red;" ); + $this->assertStringNotContainsString( '@import', $result ); + $this->assertStringContainsString( 'color: red;', $result ); +} + +public function test_blocked_at_rule_charset_stripped() { + $result = $this->sanitize( '@charset "UTF-8"; color: red;' ); + $this->assertStringNotContainsString( '@charset', $result ); +} + +public function test_unknown_at_rule_stripped() { + $result = $this->sanitize( '@unknown-future-rule { color: red; } .a { color: blue; }' ); + $this->assertStringNotContainsString( '@unknown-future-rule', $result ); + $this->assertStringContainsString( 'color: blue;', $result ); +} + +// --- Idempotency --- + +/** + * @dataProvider data_idempotency_fixtures + */ +public function test_sanitize_is_idempotent( string $css ) { + $once = $this->sanitize( $css ); + $twice = $this->sanitize( $once ); + $this->assertSame( $once, $twice, 'sanitize() must be idempotent' ); +} + +public function data_idempotency_fixtures(): array { + return array( + 'simple declaration' => array( 'color: red;' ), + 'nesting ampersand' => array( 'color: blue; & p { color: red; }' ), + 'child combinator' => array( '& > p { margin: 0; }' ), + 'media query' => array( '@media (max-width: 768px) { color: red; }' ), + 'custom property' => array( '--my-color: #ff0000;' ), + 'multiple declarations' => array( 'color: red; font-size: 16px; margin: 0;' ), + 'var() usage' => array( 'color: var(--my-color);' ), + 'already sanitized import' => array( 'color: blue;' ), + ); +} + +// --- get_removed_tokens() --- + +public function test_get_removed_tokens_empty_when_nothing_stripped() { + $p = new WP_CSS_Token_Processor( 'color: red;' ); + $p->sanitize(); + $this->assertEmpty( $p->get_removed_tokens() ); +} + +public function test_get_removed_tokens_populated_after_strip() { + $p = new WP_CSS_Token_Processor( 'background: url(javascript:alert(1));' ); + $p->sanitize(); + $removed = $p->get_removed_tokens(); + $this->assertNotEmpty( $removed ); + $this->assertArrayHasKey( 'token', $removed[0] ); + $this->assertArrayHasKey( 'reason', $removed[0] ); +} +``` + +**Step 3: Run — expect FAIL** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +**Step 4: Implement `sanitize()` and `get_removed_tokens()`** + +Add property: +```php +/** + * Log of tokens removed during sanitize(). + * + * Each entry: [ 'token' => string, 'reason' => string ] + * + * @since X.X.0 + * @var array + */ +private $removed_tokens = array(); +``` + +Add constants for the allowed at-rule list: +```php +/** + * At-rule keywords that are allowed in block custom CSS. + * + * @since X.X.0 + * @var string[] + */ +const ALLOWED_AT_RULES = array( + 'media', + 'supports', + 'keyframes', + '-webkit-keyframes', + 'layer', + 'container', + 'font-face', +); +``` + +Implement the methods: +```php +/** + * Returns the list of tokens removed during the last sanitize() call. + * + * Each entry contains: + * - 'token' string The raw token text that was removed. + * - 'reason' string A short description of why it was removed. + * + * @since X.X.0 + * + * @return array Array of removal log entries. + */ +public function get_removed_tokens(): array { + return $this->removed_tokens; +} + +/** + * Sanitizes the CSS string, stripping unsafe tokens and rules. + * + * Applies the following security policy: + * - Returns '' immediately if `) tokens. + * - Strips url() tokens with javascript: or data: protocols. + * - Strips url() tokens with any protocol not in wp_allowed_protocols(). + * - Strips @import, @charset, @namespace, and unknown at-rules (with their blocks). + * - Strip granularity: bad token → remove token; bad at-rule → remove entire rule. + * + * Idempotency guarantee: sanitize( sanitize( $css ) ) === sanitize( $css ). + * + * @since X.X.0 + * + * @return string The sanitized CSS string. + */ +public function sanitize(): string { + // Injection guard — if css, 'removed_tokens = array(); + $this->replacements = array(); + $this->reset(); + + $allowed_protocols = wp_allowed_protocols(); + + while ( $this->next_token() ) { + $type = $this->get_token_type(); + $value = $this->get_token_value(); + + // Strip HTML comment tokens — these have no valid use in CSS. + if ( self::CDO_TOKEN === $type || self::CDC_TOKEN === $type ) { + $this->removed_tokens[] = array( 'token' => $value, 'reason' => 'html_comment' ); + $this->remove_token(); + continue; + } + + // Strip malformed tokens. + if ( self::BAD_STRING_TOKEN === $type ) { + $this->removed_tokens[] = array( 'token' => $value, 'reason' => 'bad_string' ); + $this->remove_token(); + continue; + } + if ( self::BAD_URL_TOKEN === $type ) { + $this->removed_tokens[] = array( 'token' => $value, 'reason' => 'bad_url' ); + $this->remove_token(); + continue; + } + + // URL protocol filtering. + if ( self::URL_TOKEN === $type ) { + // Extract the URL from url(...). + $url = preg_replace( '/^url\(\s*["\']?|["\']?\s*\)$/i', '', $value ); + $url = trim( $url ); + + $scheme = strtolower( (string) parse_url( $url, PHP_URL_SCHEME ) ); + if ( 'javascript' === $scheme || 'data' === $scheme ) { + // Always strip javascript: and data: — high risk, no legitimate use in block CSS. + $this->removed_tokens[] = array( 'token' => $value, 'reason' => 'unsafe_url_protocol' ); + $this->remove_token(); + continue; + } + if ( '' !== $scheme && ! in_array( $scheme, $allowed_protocols, true ) ) { + $this->removed_tokens[] = array( 'token' => $value, 'reason' => 'disallowed_url_protocol' ); + $this->remove_token(); + continue; + } + } + + // At-rule allowlist enforcement. + if ( self::AT_KEYWORD_TOKEN === $type ) { + // The token value includes '@', e.g. '@media' — strip the '@' for comparison. + $keyword = strtolower( ltrim( $value, '@' ) ); + + if ( ! in_array( $keyword, self::ALLOWED_AT_RULES, true ) ) { + // Strip the at-rule keyword and its entire following block (if any). + $this->removed_tokens[] = array( 'token' => $value, 'reason' => 'disallowed_at_rule' ); + $this->remove_token(); + // Consume and remove the rule's block or up to the next semicolon. + $this->consume_and_remove_rule_block(); + } + } + } + + return $this->get_updated_css(); +} + +/** + * Consumes and removes the block or semicolon-terminated tail of an at-rule. + * + * Called immediately after removing an at-rule keyword token. Advances + * the cursor and removes tokens until the at-rule ends — either at a + * top-level ';' (for statement at-rules like @import) or after a + * balanced '{ ... }' block (for block at-rules like @media). + * + * @since X.X.0 + */ +private function consume_and_remove_rule_block(): void { + $depth = 0; + while ( $this->next_token() ) { + $type = $this->get_token_type(); + $this->remove_token(); + + if ( self::OPEN_CURLY_TOKEN === $type ) { + ++$depth; + } elseif ( self::CLOSE_CURLY_TOKEN === $type ) { + --$depth; + if ( $depth <= 0 ) { + break; + } + } elseif ( self::SEMICOLON_TOKEN === $type && 0 === $depth ) { + break; + } elseif ( self::EOF_TOKEN === $type ) { + break; + } + } +} + +/** + * Resets the processor cursor to the beginning of the input. + * + * Called at the start of sanitize() and validate() to allow the + * same instance to be used cleanly for one operation. + * + * @since X.X.0 + */ +private function reset(): void { + $this->at = 0; + $this->token_start = null; + $this->token_length = 0; + $this->token_type = null; + $this->block_depth = 0; + $this->replacements = array(); +} +``` + +**Step 5: Run — expect PASS** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +**Step 6: Commit** + +```bash +git add src/wp-includes/css-api/class-wp-css-token-processor.php tests/phpunit/tests/css-api/WpCssTokenSanitizeTest.php +git commit -m "CSS API: Implement sanitize() with security policy and tests" +``` + +--- + +## Task 5: Implement `validate()` + +**Files:** +- Modify: `src/wp-includes/css-api/class-wp-css-token-processor.php` +- Create: `tests/phpunit/tests/css-api/WpCssTokenValidateTest.php` + +**Step 1: Create test file and write failing tests** + +```php +validate(); + } + + // --- Returns true for safe CSS --- + + public function test_valid_simple_css_returns_true() { + $this->assertTrue( $this->validate( 'color: red;' ) ); + } + + public function test_valid_nested_css_returns_true() { + $this->assertTrue( $this->validate( 'color: blue; & p { color: red; }' ) ); + } + + public function test_valid_media_query_returns_true() { + $this->assertTrue( $this->validate( '@media (max-width: 768px) { color: red; }' ) ); + } + + // --- Returns WP_Error for each blocked condition --- + + public function test_style_close_tag_returns_wp_error() { + $result = $this->validate( 'color: red; ' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_injection', $result->get_error_code() ); + } + + public function test_bad_string_token_returns_wp_error() { + $result = $this->validate( "content: \"bad\nstring\";" ); + $this->assertWPError( $result ); + $this->assertSame( 'css_malformed_token', $result->get_error_code() ); + } + + public function test_bad_url_token_returns_wp_error() { + $result = $this->validate( 'background: url(bad url);' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_malformed_token', $result->get_error_code() ); + } + + public function test_javascript_url_returns_wp_error() { + $result = $this->validate( 'background: url(javascript:alert(1));' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_unsafe_url', $result->get_error_code() ); + } + + public function test_data_url_returns_wp_error() { + $result = $this->validate( 'background: url(data:image/png;base64,abc);' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_unsafe_url', $result->get_error_code() ); + } + + public function test_blocked_at_rule_returns_wp_error() { + $result = $this->validate( "@import url('https://evil.com/style.css');" ); + $this->assertWPError( $result ); + $this->assertSame( 'css_disallowed_at_rule', $result->get_error_code() ); + } + + public function test_unknown_at_rule_returns_wp_error() { + $result = $this->validate( '@unknown-rule { color: red; }' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_disallowed_at_rule', $result->get_error_code() ); + } + + public function test_cdo_token_returns_wp_error() { + $result = $this->validate( ' color: red;' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_html_comment', $result->get_error_code() ); + } + + // --- validate() passing guarantees sanitize() is a no-op --- + + /** + * @dataProvider data_valid_css_fixtures + */ + public function test_validate_passing_means_sanitize_is_noop( string $css ) { + $p = new WP_CSS_Token_Processor( $css ); + $validation = $p->validate(); + if ( true !== $validation ) { + $this->markTestSkipped( 'CSS is not valid — skipping no-op check.' ); + } + $sanitized = ( new WP_CSS_Token_Processor( $css ) )->sanitize(); + $this->assertSame( $css, $sanitized, 'If validate() returns true, sanitize() must be a no-op.' ); + } + + public function data_valid_css_fixtures(): array { + return array( + array( 'color: red;' ), + array( 'color: blue; & p { color: red; }' ), + array( '& > p { margin: 0; }' ), + array( '@media (max-width: 768px) { color: red; }' ), + array( '--my-color: #ff0000;' ), + array( 'color: var(--my-color);' ), + array( 'background: url(https://example.com/image.png);' ), + ); + } +} +``` + +**Step 2: Run — expect FAIL** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +**Step 3: Implement `validate()`** + +```php +/** + * Validates that the CSS string is safe to store and output. + * + * Returns true if the CSS would survive sanitize() unchanged. + * Returns a WP_Error if any unsafe construct is detected. + * + * Unlike sanitize(), this method does not modify the CSS. It returns + * on the first violation found. + * + * Error codes: + * - 'css_injection' — ) token + * + * @since X.X.0 + * + * @return true|WP_Error True if the CSS is valid, WP_Error otherwise. + */ +public function validate() { + // Injection guard. + if ( false !== stripos( $this->css, 'reset(); + $allowed_protocols = wp_allowed_protocols(); + + while ( $this->next_token() ) { + $type = $this->get_token_type(); + $value = $this->get_token_value(); + + if ( self::CDO_TOKEN === $type || self::CDC_TOKEN === $type ) { + return new WP_Error( 'css_html_comment', __( 'CSS must not contain HTML comment tokens.' ) ); + } + + if ( self::BAD_STRING_TOKEN === $type || self::BAD_URL_TOKEN === $type ) { + return new WP_Error( 'css_malformed_token', __( 'CSS contains a malformed string or URL token.' ) ); + } + + if ( self::URL_TOKEN === $type ) { + $url = preg_replace( '/^url\(\s*["\']?|["\']?\s*\)$/i', '', $value ); + $url = trim( $url ); + $scheme = strtolower( (string) parse_url( $url, PHP_URL_SCHEME ) ); + + if ( 'javascript' === $scheme || 'data' === $scheme ) { + return new WP_Error( 'css_unsafe_url', __( 'CSS contains a URL with an unsafe protocol.' ) ); + } + if ( '' !== $scheme && ! in_array( $scheme, $allowed_protocols, true ) ) { + return new WP_Error( 'css_unsafe_url', __( 'CSS contains a URL with a disallowed protocol.' ) ); + } + } + + if ( self::AT_KEYWORD_TOKEN === $type ) { + $keyword = strtolower( ltrim( $value, '@' ) ); + if ( ! in_array( $keyword, self::ALLOWED_AT_RULES, true ) ) { + return new WP_Error( 'css_disallowed_at_rule', __( 'CSS contains a disallowed at-rule.' ) ); + } + } + } + + return true; +} +``` + +**Step 4: Run — expect PASS** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +**Step 5: Commit** + +```bash +git add src/wp-includes/css-api/class-wp-css-token-processor.php tests/phpunit/tests/css-api/WpCssTokenValidateTest.php +git commit -m "CSS API: Implement validate() with WP_Error codes and tests" +``` + +--- + +## Task 6: Final review, edge case tests, and documentation pass + +**Files:** +- Modify: `src/wp-includes/css-api/class-wp-css-token-processor.php` (docblock polish) +- Modify: `tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php` (edge cases) +- Modify: `tests/phpunit/tests/css-api/WpCssTokenSanitizeTest.php` (edge cases) + +**Step 1: Add edge case tests for known tricky inputs** + +Add to `WpCssTokenProcessorTest.php`: +```php +public function test_empty_input_returns_no_tokens() { + $p = new WP_CSS_Token_Processor( '' ); + $this->assertFalse( $p->next_token() ); +} + +public function test_whitespace_only_input() { + $p = new WP_CSS_Token_Processor( ' ' ); + $this->assertTrue( $p->next_token() ); + $this->assertSame( WP_CSS_Token_Processor::WHITESPACE_TOKEN, $p->get_token_type() ); + $this->assertFalse( $p->next_token() ); +} + +public function test_function_token_calc() { + $p = new WP_CSS_Token_Processor( 'calc(' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::FUNCTION_TOKEN, $p->get_token_type() ); + $this->assertSame( 'calc(', $p->get_token_value() ); +} + +public function test_sequence_of_tokens_in_declaration() { + $p = new WP_CSS_Token_Processor( 'color: red;' ); + $tokens = array(); + while ( $p->next_token() ) { + $tokens[] = $p->get_token_type(); + } + $this->assertContains( WP_CSS_Token_Processor::IDENT_TOKEN, $tokens ); + $this->assertContains( WP_CSS_Token_Processor::COLON_TOKEN, $tokens ); + $this->assertContains( WP_CSS_Token_Processor::SEMICOLON_TOKEN, $tokens ); +} +``` + +Add to `WpCssTokenSanitizeTest.php`: +```php +public function test_empty_input_returns_empty_string() { + $this->assertSame( '', $this->sanitize( '' ) ); +} + +public function test_custom_properties_survive() { + $css = '--my-color: #ff0000;'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +public function test_var_function_survives() { + $css = 'color: var(--my-color);'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +public function test_keyframes_survives() { + $css = '@keyframes slide { from { opacity: 0; } to { opacity: 1; } }'; + $this->assertSame( $css, $this->sanitize( $css ) ); +} + +public function test_multiple_blocked_at_rules_all_stripped() { + $result = $this->sanitize( "@import 'evil.css'; @charset 'UTF-8'; color: red;" ); + $this->assertStringNotContainsString( '@import', $result ); + $this->assertStringNotContainsString( '@charset', $result ); + $this->assertStringContainsString( 'color: red;', $result ); +} + +// The specific compounding corruption scenario from PR #11104. +public function test_pr_11104_regression_repeated_saves_do_not_corrupt() { + $original = 'color: blue; & p { color: red; }'; + $after_save_1 = $this->sanitize( $original ); + $after_save_2 = $this->sanitize( $after_save_1 ); + $after_save_3 = $this->sanitize( $after_save_2 ); + $this->assertSame( $original, $after_save_1 ); + $this->assertSame( $original, $after_save_2 ); + $this->assertSame( $original, $after_save_3 ); +} +``` + +**Step 2: Run full test suite** + +```bash +php vendor/phpunit/phpunit/phpunit --group css-api +``` + +Expected: All pass. + +**Step 3: Review class docblock** + +Open `src/wp-includes/css-api/class-wp-css-token-processor.php`. Verify: +- Class docblock explains purpose, non-goals, spec reference, usage examples, known gaps +- Every public method has `@since`, `@param`, `@return` +- Every security decision in `sanitize()` and `validate()` has a comment explaining *why* + +**Step 4: Final commit** + +```bash +git add src/wp-includes/css-api/class-wp-css-token-processor.php tests/phpunit/tests/css-api/ +git commit -m "CSS API: Add edge case tests and final documentation pass for WP_CSS_Token_Processor" +``` + +--- + +## Completion checklist + +- [ ] `src/wp-includes/css-api/class-wp-css-token-processor.php` exists and loads via `wp-settings.php` +- [ ] `src/wp-includes/css-api/README.md` exists +- [ ] All token types from the design doc are implemented and tested +- [ ] `sanitize()` passes all fixture tests including PR #11104 regression cases +- [ ] `sanitize()` is idempotent (tested over fixture set) +- [ ] `validate()` returns `true` for safe CSS and `WP_Error` with correct codes for each violation +- [ ] `validate()` passing guarantees `sanitize()` is a no-op (tested) +- [ ] `get_removed_tokens()` is populated correctly after `sanitize()` +- [ ] All public methods have full PHPDoc +- [ ] All tests use `@group css-api` +- [ ] `php vendor/phpunit/phpunit/phpunit --group css-api` passes with no failures diff --git a/src/wp-includes/css-api/README.md b/src/wp-includes/css-api/README.md new file mode 100644 index 0000000000000..4d6dfc0bd0a36 --- /dev/null +++ b/src/wp-includes/css-api/README.md @@ -0,0 +1,72 @@ +# CSS API + +The CSS API provides tools for safely processing CSS strings in WordPress. + +## WP_CSS_Token_Processor + +A streaming, forward-only CSS tokenizer. Spec-inspired (CSS Syntax Level 3), +safety-first: unknown or unsupported constructs are stripped rather than +passed through silently. + +### Primary use cases + +**Sanitize block-level custom CSS for storage:** + + $processor = new WP_CSS_Token_Processor( $css ); + $safe_css = $processor->sanitize(); + +**Validate CSS in REST API endpoints:** + + $processor = new WP_CSS_Token_Processor( $css ); + $result = $processor->validate(); // true or WP_Error + +### Security policy + +- ``): stripped +- Null bytes: stripped in preprocessing + +### Token type reference + +Token type constants are defined as class constants on `WP_CSS_Token_Processor`: +`IDENT_TOKEN`, `FUNCTION_TOKEN`, `AT_KEYWORD_TOKEN`, `HASH_TOKEN`, `STRING_TOKEN`, +`BAD_STRING_TOKEN`, `URL_TOKEN`, `BAD_URL_TOKEN`, `DELIM_TOKEN`, `NUMBER_TOKEN`, +`PERCENTAGE_TOKEN`, `DIMENSION_TOKEN`, `WHITESPACE_TOKEN`, `CDO_TOKEN`, `CDC_TOKEN`, +`COLON_TOKEN`, `SEMICOLON_TOKEN`, `COMMA_TOKEN`, `OPEN_SQUARE_TOKEN`, +`CLOSE_SQUARE_TOKEN`, `OPEN_PAREN_TOKEN`, `CLOSE_PAREN_TOKEN`, `OPEN_CURLY_TOKEN`, +`CLOSE_CURLY_TOKEN`, `EOF_TOKEN`. + +### validate() error codes + +| Code | Condition | +|---------------------------|--------------------------------------------------------| +| `css_injection` | ``) | +| `css_malformed_token` | `bad-string-token` or `bad-url-token` | +| `css_unsafe_url` | `url()` with `javascript:` or `data:` scheme, or a scheme not in `wp_allowed_protocols()` | +| `css_disallowed_at_rule` | At-rule keyword not in the allowed list | + +If `validate()` returns `true`, calling `sanitize()` on the same input is guaranteed +to be a no-op (the input is returned unchanged). + +### Known gaps (v1) + +- Unicode range tokens (`U+`) are not supported. +- Surrogate pair edge cases beyond basic UTF-8 are not handled. +- CSS escape sequences (`\XX` or `\`) in identifiers are not supported; + a backslash is emitted as `DELIM_TOKEN`. +- `url("javascript:...")` with a quoted string argument is not flagged by the URL + protocol check — it tokenizes as `FUNCTION_TOKEN` + `STRING_TOKEN`, not as + `URL_TOKEN`. This is not a practical security concern (browsers do not execute + `javascript:` URLs in CSS resource-fetch contexts) but means `validate()` does + not reject quoted `javascript:` in `url()`. +- CSS block comments (`/* ... */`) are not tokenized as a unit and their content + passes through `sanitize()` unchanged. + +### Spec reference + +CSS Syntax Level 3: https://www.w3.org/TR/css-syntax-3/ diff --git a/src/wp-includes/css-api/class-wp-css-token-processor.php b/src/wp-includes/css-api/class-wp-css-token-processor.php new file mode 100644 index 0000000000000..482f640bd2dc4 --- /dev/null +++ b/src/wp-includes/css-api/class-wp-css-token-processor.php @@ -0,0 +1,1397 @@ +`) in identifiers are not supported; + * a backslash is emitted as a DELIM_TOKEN rather than starting an escaped ident. + * - `url("javascript:...")` with a quoted string argument tokenizes as + * FUNCTION_TOKEN + STRING_TOKEN (not URL_TOKEN), so the URL protocol check in + * sanitize() and validate() does not fire for quoted javascript: in url(). + * - CSS block comments (slash-star ... star-slash) are not tokenized as a unit; the + * comment delimiters and body are emitted as individual DELIM_TOKEN, WHITESPACE_TOKEN, + * and IDENT_TOKEN tokens. This means comment content passes through sanitize() + * unchanged. Stripping all comments was not implemented to avoid destroying + * intentional author comments in stored CSS. + * + * ## Usage + * + * ### Sanitize for storage (KSES pipeline): + * + * $processor = new WP_CSS_Token_Processor( $css ); + * $safe_css = $processor->sanitize(); + * + * ### Validate for REST API: + * + * $processor = new WP_CSS_Token_Processor( $css ); + * $result = $processor->validate(); // true or WP_Error + * + * ### Low-level token inspection: + * + * $processor = new WP_CSS_Token_Processor( $css ); + * while ( $processor->next_token() ) { + * if ( WP_CSS_Token_Processor::URL_TOKEN === $processor->get_token_type() ) { + * // inspect or modify + * } + * } + * $output = $processor->get_updated_css(); + * + * @since X.X.0 + */ +class WP_CSS_Token_Processor { + + /** + * Represents a CSS ident-token. + * + * @since X.X.0 + * @var string + */ + const IDENT_TOKEN = 'ident-token'; + + /** + * Represents a CSS function-token (ident followed by `(`). + * + * @since X.X.0 + * @var string + */ + const FUNCTION_TOKEN = 'function-token'; + + /** + * Represents a CSS at-keyword-token (e.g. `@media`). + * + * @since X.X.0 + * @var string + */ + const AT_KEYWORD_TOKEN = 'at-keyword-token'; + + /** + * Represents a CSS hash-token (e.g. `#ff0000`). + * + * @since X.X.0 + * @var string + */ + const HASH_TOKEN = 'hash-token'; + + /** + * Represents a CSS string-token (single- or double-quoted string). + * + * @since X.X.0 + * @var string + */ + const STRING_TOKEN = 'string-token'; + + /** + * Represents a CSS bad-string-token (e.g. an unterminated or newline-broken string). + * + * @since X.X.0 + * @var string + */ + const BAD_STRING_TOKEN = 'bad-string-token'; + + /** + * Represents a CSS url-token (unquoted URL, e.g. `url(foo.png)`). + * + * @since X.X.0 + * @var string + */ + const URL_TOKEN = 'url-token'; + + /** + * Represents a CSS bad-url-token (malformed unquoted URL). + * + * @since X.X.0 + * @var string + */ + const BAD_URL_TOKEN = 'bad-url-token'; + + /** + * Represents a CSS delim-token (a single unrecognised character). + * + * @since X.X.0 + * @var string + */ + const DELIM_TOKEN = 'delim-token'; + + /** + * Represents a CSS number-token (e.g. `42`, `3.14`). + * + * @since X.X.0 + * @var string + */ + const NUMBER_TOKEN = 'number-token'; + + /** + * Represents a CSS percentage-token (e.g. `50%`). + * + * @since X.X.0 + * @var string + */ + const PERCENTAGE_TOKEN = 'percentage-token'; + + /** + * Represents a CSS dimension-token (e.g. `16px`, `1.5rem`). + * + * @since X.X.0 + * @var string + */ + const DIMENSION_TOKEN = 'dimension-token'; + + /** + * Represents a CSS whitespace-token (one or more whitespace characters). + * + * @since X.X.0 + * @var string + */ + const WHITESPACE_TOKEN = 'whitespace-token'; + + /** + * Represents a CSS CDO-token (``). + * + * @since X.X.0 + * @var string + */ + const CDC_TOKEN = 'CDC-token'; + + /** + * Represents a CSS colon-token (`:`). + * + * @since X.X.0 + * @var string + */ + const COLON_TOKEN = 'colon-token'; + + /** + * Represents a CSS semicolon-token (`;`). + * + * @since X.X.0 + * @var string + */ + const SEMICOLON_TOKEN = 'semicolon-token'; + + /** + * Represents a CSS comma-token (`,`). + * + * @since X.X.0 + * @var string + */ + const COMMA_TOKEN = 'comma-token'; + + /** + * Represents a CSS [-token (`[`). + * + * @since X.X.0 + * @var string + */ + const OPEN_SQUARE_TOKEN = '[-token'; + + /** + * Represents a CSS ]-token (`]`). + * + * @since X.X.0 + * @var string + */ + const CLOSE_SQUARE_TOKEN = ']-token'; + + /** + * Represents a CSS (-token (`(`). + * + * @since X.X.0 + * @var string + */ + const OPEN_PAREN_TOKEN = '(-token'; + + /** + * Represents a CSS )-token (`)`). + * + * @since X.X.0 + * @var string + */ + const CLOSE_PAREN_TOKEN = ')-token'; + + /** + * Represents a CSS {-token (`{`). + * + * @since X.X.0 + * @var string + */ + const OPEN_CURLY_TOKEN = '{-token'; + + /** + * Represents a CSS }-token (`}`). + * + * @since X.X.0 + * @var string + */ + const CLOSE_CURLY_TOKEN = '}-token'; + + /** + * Represents a CSS EOF-token (end of input). + * + * @since X.X.0 + * @var string + */ + const EOF_TOKEN = 'EOF-token'; + + /* + * Note: UNICODE_RANGE_TOKEN is intentionally absent. The tokenizer does not + * emit this token type; U+ sequences are treated as unknown tokens (DELIM_TOKEN). + * See the "Known gaps (v1)" section in the class docblock. + */ + + /** + * At-rule keywords permitted in block custom CSS. + * + * At-rules not in this list are stripped by sanitize() and flagged + * by validate(). The check is case-insensitive. Vendor-prefixed variants + * (e.g. -webkit-keyframes) are included as explicit literal entries in + * this list. + * + * @since X.X.0 + * @var string[] + */ + const ALLOWED_AT_RULES = array( + 'media', + 'supports', + 'keyframes', + '-webkit-keyframes', + 'layer', + 'container', + 'font-face', + ); + + /** + * The original CSS input string (null bytes stripped). + * + * @since X.X.0 + * @var string + */ + private $css = ''; + + /** + * Current byte offset within the CSS string. + * + * @since X.X.0 + * @var int + */ + private $at = 0; + + /** + * Byte offset where the current token starts, or null before the first token. + * + * @since X.X.0 + * @var int|null + */ + private $token_start = null; + + /** + * Byte length of the current token. + * + * @since X.X.0 + * @var int + */ + private $token_length = 0; + + /** + * Type of the current token, or null when no token has been consumed yet. + * + * @since X.X.0 + * @var string|null + */ + private $token_type = null; + + /** + * Cached byte length of the CSS string. + * + * @since X.X.0 + * @var int + */ + private $length = 0; + + /** + * Current `{ }` nesting depth. + * + * @since X.X.0 + * @var int + */ + private $block_depth = 0; + + /** + * Pending token replacements to apply on get_updated_css(). + * + * Each entry is an array with keys: + * - 'start' int Byte offset of the token in the original CSS string. + * - 'length' int Byte length of the token in the original CSS string. + * - 'replacement' string Replacement text (empty string to remove the token). + * + * @since X.X.0 + * @var array + */ + private $replacements = array(); + + /** + * Log of tokens removed during the last sanitize() call. + * + * Each entry: [ 'token' => string, 'reason' => string ] + * + * Reset at the start of each sanitize() call. + * + * @since X.X.0 + * @var array + */ + private $removed_tokens = array(); + + /** + * Constructor. + * + * @since X.X.0 + * + * @param string $css The CSS string to process. + */ + public function __construct( string $css ) { + // Strip null bytes before any processing — these have no valid use in CSS + // and are a common vector for bypassing text filters. + $this->css = str_replace( "\0", '', $css ); + $this->length = strlen( $this->css ); + } + + /** + * Advances the tokenizer to the next token. + * + * Returns true when a token was consumed, false at end-of-input. + * + * @since X.X.0 + * + * @return bool True if a token was consumed, false at end of input. + */ + public function next_token(): bool { + if ( $this->at >= $this->length ) { + $this->token_type = self::EOF_TOKEN; + $this->token_start = $this->at; + $this->token_length = 0; + return false; + } + + $this->token_start = $this->at; + $c = $this->css[ $this->at ]; + + // Whitespace. + if ( ' ' === $c || "\t" === $c || "\n" === $c || "\r" === $c || "\f" === $c ) { + $this->at++; + while ( $this->at < $this->length ) { + $nc = $this->css[ $this->at ]; + if ( ' ' !== $nc && "\t" !== $nc && "\n" !== $nc && "\r" !== $nc && "\f" !== $nc ) { + break; + } + $this->at++; + } + $this->token_type = self::WHITESPACE_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + + // Single-character punctuation. + if ( ':' === $c ) { + $this->at++; + $this->token_type = self::COLON_TOKEN; + $this->token_length = 1; + return true; + } + + if ( ';' === $c ) { + $this->at++; + $this->token_type = self::SEMICOLON_TOKEN; + $this->token_length = 1; + return true; + } + + if ( ',' === $c ) { + $this->at++; + $this->token_type = self::COMMA_TOKEN; + $this->token_length = 1; + return true; + } + + if ( '{' === $c ) { + $this->at++; + $this->block_depth++; + $this->token_type = self::OPEN_CURLY_TOKEN; + $this->token_length = 1; + return true; + } + + if ( '}' === $c ) { + $this->at++; + if ( $this->block_depth > 0 ) { + $this->block_depth--; + } + $this->token_type = self::CLOSE_CURLY_TOKEN; + $this->token_length = 1; + return true; + } + + if ( '(' === $c ) { + $this->at++; + $this->token_type = self::OPEN_PAREN_TOKEN; + $this->token_length = 1; + return true; + } + + if ( ')' === $c ) { + $this->at++; + $this->token_type = self::CLOSE_PAREN_TOKEN; + $this->token_length = 1; + return true; + } + + if ( '[' === $c ) { + $this->at++; + $this->token_type = self::OPEN_SQUARE_TOKEN; + $this->token_length = 1; + return true; + } + + if ( ']' === $c ) { + $this->at++; + $this->token_type = self::CLOSE_SQUARE_TOKEN; + $this->token_length = 1; + return true; + } + + // CDO-token `` — must be checked before general `-` ident-start. + if ( '-' === $c ) { + if ( $this->at + 2 < $this->length && '-->' === substr( $this->css, $this->at, 3 ) ) { + $this->at += 3; + $this->token_type = self::CDC_TOKEN; + $this->token_length = 3; + return true; + } + // Check for ident-start after `-` (custom property `--` or `-` + ident-start char). + if ( $this->is_ident_start( $this->at ) ) { + $this->consume_ident_chars(); + $ident_value = substr( $this->css, $this->token_start, $this->at - $this->token_start ); + // url( special handling. + if ( $this->at < $this->length && '(' === $this->css[ $this->at ] && 'url' === strtolower( $ident_value ) ) { + return $this->consume_url_or_function(); + } + if ( $this->at < $this->length && '(' === $this->css[ $this->at ] ) { + $this->at++; + $this->token_type = self::FUNCTION_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + $this->token_type = self::IDENT_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + // Falls through to DELIM_TOKEN. + } + + // at-keyword-token. + if ( '@' === $c ) { + if ( $this->at + 1 < $this->length && $this->is_ident_start( $this->at + 1 ) ) { + $this->at++; // consume `@`. + $this->consume_ident_chars(); + $this->token_type = self::AT_KEYWORD_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + // Falls through to DELIM_TOKEN. + } + + // hash-token. + if ( '#' === $c ) { + if ( $this->at + 1 < $this->length ) { + $nc = $this->css[ $this->at + 1 ]; + if ( $this->is_ident_char( $nc ) ) { + $this->at++; // consume `#`. + $this->consume_ident_chars(); + $this->token_type = self::HASH_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + } + // Falls through to DELIM_TOKEN. + } + + // Numeric tokens — number-token, dimension-token, percentage-token. + if ( $this->is_number_start( $this->at ) ) { + $this->consume_number(); + if ( $this->at < $this->length && '%' === $this->css[ $this->at ] ) { + $this->at++; + $this->token_type = self::PERCENTAGE_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + if ( $this->at < $this->length && $this->is_ident_start( $this->at ) ) { + $this->consume_ident_chars(); + $this->token_type = self::DIMENSION_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + $this->token_type = self::NUMBER_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + + // String tokens. + if ( '"' === $c || "'" === $c ) { + $quote = $c; + $this->at++; + $bad = false; + while ( $this->at < $this->length ) { + $sc = $this->css[ $this->at ]; + if ( $sc === $quote ) { + $this->at++; + break; + } + if ( '\\' === $sc ) { + // Skip the next character (escape sequence). + $this->at += 2; + continue; + } + if ( "\n" === $sc || "\r" === $sc || "\f" === $sc ) { + $bad = true; + break; + } + $this->at++; + } + $this->token_type = $bad ? self::BAD_STRING_TOKEN : self::STRING_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + + // Ident-like tokens (ident-token, function-token). + if ( $this->is_ident_start( $this->at ) ) { + $this->consume_ident_chars(); + $ident_value = substr( $this->css, $this->token_start, $this->at - $this->token_start ); + // url( special handling. + if ( $this->at < $this->length && '(' === $this->css[ $this->at ] && 'url' === strtolower( $ident_value ) ) { + return $this->consume_url_or_function(); + } + if ( $this->at < $this->length && '(' === $this->css[ $this->at ] ) { + $this->at++; + $this->token_type = self::FUNCTION_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + $this->token_type = self::IDENT_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } + + // Fallback: DELIM_TOKEN — consume one byte. + $this->at++; + $this->token_type = self::DELIM_TOKEN; + $this->token_length = 1; + return true; + } + + /** + * Returns the type of the current token. + * + * Returns null before the first call to next_token(). + * + * @since X.X.0 + * + * @return string|null Token type constant, or null if no token has been consumed. + */ + public function get_token_type(): ?string { + return $this->token_type; + } + + /** + * Returns the raw CSS text of the current token. + * + * Returns null before the first call to next_token(). + * + * @since X.X.0 + * + * @return string|null Raw token text, or null if no token has been consumed. + */ + public function get_token_value(): ?string { + if ( null === $this->token_start ) { + return null; + } + return substr( $this->css, $this->token_start, $this->token_length ); + } + + /** + * Returns the current `{ }` nesting depth. + * + * The depth is incremented when an OPEN_CURLY_TOKEN is consumed and + * decremented (never below 0) when a CLOSE_CURLY_TOKEN is consumed. + * + * @since X.X.0 + * + * @return int Current block nesting depth. + */ + public function get_block_depth(): int { + return $this->block_depth; + } + + /** + * Removes the current token from the CSS output. + * + * Records a removal that will be applied when get_updated_css() is called. + * Has no effect and returns false if next_token() has not been called yet, + * or if next_token() has exhausted the input (returned false). + * + * @since X.X.0 + * + * @return bool Whether the removal was recorded. + */ + public function remove_token(): bool { + if ( null === $this->token_start || self::EOF_TOKEN === $this->token_type ) { + return false; + } + $this->replacements[] = array( + 'start' => $this->token_start, + 'length' => $this->token_length, + 'replacement' => '', + ); + return true; + } + + /** + * Replaces the current token's raw text in the CSS output. + * + * Records a replacement that will be applied when get_updated_css() is called. + * Has no effect and returns false if next_token() has not been called yet, + * or if next_token() has exhausted the input (returned false). + * + * Note: The replacement text is used verbatim — no escaping or validation + * is applied. Callers are responsible for providing safe replacement values. + * + * @since X.X.0 + * + * @param string $value Replacement text. + * @return bool Whether the replacement was recorded. + */ + public function set_token_value( string $value ): bool { + if ( null === $this->token_start || self::EOF_TOKEN === $this->token_type ) { + return false; + } + $this->replacements[] = array( + 'start' => $this->token_start, + 'length' => $this->token_length, + 'replacement' => $value, + ); + return true; + } + + /** + * Returns the CSS string with all recorded modifications applied. + * + * Modifications recorded via remove_token() and set_token_value() are applied + * to the original input string in reverse byte order, so that earlier byte + * offsets remain valid as later replacements are made first. + * + * If no modifications have been recorded, returns the original CSS string + * (after null-byte stripping applied in the constructor). + * + * @since X.X.0 + * + * @return string The modified CSS string. + */ + public function get_updated_css(): string { + if ( empty( $this->replacements ) ) { + return $this->css; + } + + // Deduplicate by start offset — keep the last-recorded replacement for + // any given position (last-write-wins semantics). + $keyed = array(); + foreach ( $this->replacements as $replacement ) { + $keyed[ $replacement['start'] ] = $replacement; + } + $sorted = array_values( $keyed ); + + // Sort replacements by start offset descending so we apply from end to + // start, keeping earlier byte offsets valid as we make changes. + usort( + $sorted, + static function ( $a, $b ) { + return $b['start'] - $a['start']; + } + ); + + $output = $this->css; + foreach ( $sorted as $replacement ) { + $output = substr_replace( + $output, + $replacement['replacement'], + $replacement['start'], + $replacement['length'] + ); + } + return $output; + } + + /** + * Returns the list of tokens removed during the last sanitize() call. + * + * Each entry contains: + * - 'token' string The raw token text that was removed. + * - 'reason' string A short code describing why it was removed. + * + * Returns an empty array if sanitize() has not been called, or if + * the last sanitize() call removed nothing. + * + * @since X.X.0 + * + * @return array Array of removal log entries. + */ + public function get_removed_tokens(): array { + return $this->removed_tokens; + } + + /** + * Validates the CSS string against all safety checks without modifying it. + * + * Returns `true` when the CSS passes every check. When a violation is found, + * returns a `WP_Error` on the **first** violation encountered — subsequent + * tokens are not inspected. + * + * **Guarantee:** If `validate()` returns `true`, then calling `sanitize()` on + * the same input will return that input unchanged (i.e. `sanitize()` is a + * no-op). This makes `validate()` suitable for REST API schema validation where + * you want to reject bad input rather than silently strip it. + * + * **Null bytes:** The constructor strips null bytes before any processing, so a + * `css_null_byte` violation can never be triggered on a normally-constructed + * `WP_CSS_Token_Processor` instance. No `css_null_byte` check is implemented; + * callers that need to detect raw null bytes in the original input must check + * the string before constructing the processor. + * + * **Known gap:** `url("javascript:...")` with a quoted string argument is not + * flagged as unsafe — it tokenizes as FUNCTION_TOKEN + STRING_TOKEN, not as + * URL_TOKEN, so the URL protocol check does not fire. See the sanitize() + * docblock for the full explanation. This is not a practical security concern + * but means validate() does not reject quoted javascript: in url(). + * + * **Error codes:** + * + * | Code | Condition | + * |---------------------------|--------------------------------------------------------| + * | `css_injection` | ``) | + * | `css_malformed_token` | `BAD_STRING_TOKEN` or `BAD_URL_TOKEN` | + * | `css_unsafe_url` | `URL_TOKEN` with `javascript:` or `data:` scheme, or a scheme not in `wp_allowed_protocols()` | + * | `css_disallowed_at_rule` | AT_KEYWORD_TOKEN whose keyword is not in `ALLOWED_AT_RULES` | + * + * Example usage: + * + * $processor = new WP_CSS_Token_Processor( $css ); + * $result = $processor->validate(); + * if ( is_wp_error( $result ) ) { + * // handle $result->get_error_code() ... + * } + * + * @since X.X.0 + * + * @return true|WP_Error True if the CSS passes all checks; WP_Error on the first violation. + */ + public function validate() { + // Injection guard — if element. + if ( false !== stripos( $this->css, 'reset(); + + $allowed_protocols = wp_allowed_protocols(); + + while ( $this->next_token() ) { + $type = $this->get_token_type(); + $value = $this->get_token_value(); + + // HTML comment tokens have no valid use in CSS. + if ( self::CDO_TOKEN === $type || self::CDC_TOKEN === $type ) { + return new WP_Error( + 'css_html_comment', + __( 'CSS contains an HTML comment token.' ) + ); + } + + // Malformed tokens — bad-string and bad-url have no recoverable content. + if ( self::BAD_STRING_TOKEN === $type || self::BAD_URL_TOKEN === $type ) { + return new WP_Error( + 'css_malformed_token', + __( 'CSS contains a malformed token.' ) + ); + } + + // URL protocol filtering. + if ( self::URL_TOKEN === $type ) { + // URL_TOKEN always contains an unquoted URL (quoted URLs become FUNCTION_TOKEN + // in consume_url_or_function()). The optional quote groups are included + // defensively, in case the token value is ever constructed differently. + $url = (string) preg_replace( '/^url\(\s*["\']?|["\']?\s*\)$/i', '', $value ); + $url = trim( $url ); + $scheme = strtolower( (string) parse_url( $url, PHP_URL_SCHEME ) ); + + if ( 'javascript' === $scheme || 'data' === $scheme ) { + return new WP_Error( + 'css_unsafe_url', + __( 'CSS contains a URL with an unsafe scheme.' ) + ); + } + + if ( '' !== $scheme && ! in_array( $scheme, $allowed_protocols, true ) ) { + return new WP_Error( + 'css_unsafe_url', + __( 'CSS contains a URL with a disallowed scheme.' ) + ); + } + } + + // At-rule allowlist enforcement. + if ( self::AT_KEYWORD_TOKEN === $type ) { + // Strip '@' and normalise to lowercase for comparison. + $keyword = strtolower( ltrim( $value, '@' ) ); + + if ( ! in_array( $keyword, self::ALLOWED_AT_RULES, true ) ) { + return new WP_Error( + 'css_disallowed_at_rule', + __( 'CSS contains a disallowed at-rule.' ) + ); + } + } + } + + return true; + } + + /** + * Sanitizes the CSS string, stripping unsafe tokens and rules. + * + * Security policy applied: + * + * - Returns '' immediately if ` element, so ``) tokens. HTML comments have no + * valid use in CSS and suggest an attempt to embed CSS inside HTML. + * + * - Strips `bad-string-token` and `bad-url-token`. These represent + * malformed CSS constructs that should not be preserved. + * + * - Strips `url()` tokens where the URL has a `javascript:` or `data:` + * scheme — these are always unsafe in a CSS context. Other URL tokens + * whose scheme is not in wp_allowed_protocols() have their URL value + * replaced with '' (preserving the `url()` wrapper) to avoid breaking + * the surrounding declaration structure while removing the unsafe URL. + * + * Known gap: `url("javascript:...")` with a quoted string argument tokenizes + * as FUNCTION_TOKEN `url(` followed by a STRING_TOKEN, not as a URL_TOKEN. + * The string is preserved as-is. This is not a practical security concern + * because browsers do not execute javascript: in CSS resource-fetch contexts, + * but it means sanitize() does not strip quoted javascript: in url(). + * + * - Strips entire at-rules whose keyword is not in ALLOWED_AT_RULES, + * including their following block or semicolon terminator. Unknown + * at-rules are stripped rather than passed through (safety-first). + * See ALLOWED_AT_RULES for the permitted list. + * + * Strip granularity: a bad token removes that token; a bad at-rule removes + * the entire rule. The rest of the CSS is preserved. + * + * Idempotency guarantee: sanitize( sanitize( $css ) ) === sanitize( $css ). + * + * @since X.X.0 + * + * @return string The sanitized CSS string. + */ + public function sanitize(): string { + // Injection guard — if css, 'removed_tokens = array(); + $this->reset(); + + $allowed_protocols = wp_allowed_protocols(); + + while ( $this->next_token() ) { + $type = $this->get_token_type(); + $value = $this->get_token_value(); + + // Strip HTML comment tokens — these have no valid use in CSS. + if ( self::CDO_TOKEN === $type || self::CDC_TOKEN === $type ) { + $this->removed_tokens[] = array( + 'token' => $value, + 'reason' => 'html_comment', + ); + $this->remove_token(); + continue; + } + + // Strip malformed tokens — bad-string and bad-url have no recoverable content. + if ( self::BAD_STRING_TOKEN === $type || self::BAD_URL_TOKEN === $type ) { + $this->removed_tokens[] = array( + 'token' => $value, + 'reason' => self::BAD_STRING_TOKEN === $type ? 'bad_string' : 'bad_url', + ); + $this->remove_token(); + continue; + } + + // URL protocol filtering. + if ( self::URL_TOKEN === $type ) { + // URL_TOKEN always contains an unquoted URL (quoted URLs become FUNCTION_TOKEN + // in consume_url_or_function()). The optional quote groups are included + // defensively, in case the token value is ever constructed differently. + $url = (string) preg_replace( '/^url\(\s*["\']?|["\']?\s*\)$/i', '', $value ); + $url = trim( $url ); + $scheme = strtolower( (string) parse_url( $url, PHP_URL_SCHEME ) ); + + if ( 'javascript' === $scheme || 'data' === $scheme ) { + // Always strip javascript: and data: entirely — no legitimate use in CSS. + $this->removed_tokens[] = array( + 'token' => $value, + 'reason' => 'unsafe_url_protocol', + ); + $this->remove_token(); + continue; + } + + if ( '' !== $scheme && ! in_array( $scheme, $allowed_protocols, true ) ) { + // Disallowed scheme — replace the URL value with '' to preserve + // the surrounding declaration structure (e.g. background: url();). + $this->removed_tokens[] = array( + 'token' => $value, + 'reason' => 'disallowed_url_protocol', + ); + $this->set_token_value( 'url()' ); + continue; + } + } + + // At-rule allowlist enforcement. + if ( self::AT_KEYWORD_TOKEN === $type ) { + // Strip '@' and normalise to lowercase for comparison. + $keyword = strtolower( ltrim( $value, '@' ) ); + + if ( ! in_array( $keyword, self::ALLOWED_AT_RULES, true ) ) { + $this->removed_tokens[] = array( + 'token' => $value, + 'reason' => 'disallowed_at_rule', + ); + $this->remove_token(); + // Consume and remove the rule's block or statement terminator. + $this->consume_and_remove_rule_block(); + } + } + } + + return $this->get_updated_css(); + } + + /** + * Resets the processor cursor to the beginning of the input. + * + * Called at the start of sanitize() and validate() to allow those + * methods to be called on a freshly constructed instance without + * requiring the caller to have iterated the tokenizer first. + * + * Clears all recorded replacements and resets the block depth counter. + * Does NOT clear $removed_tokens — that log is reset at the start of + * sanitize() so callers can read it after sanitize() returns. + * + * @since X.X.0 + */ + private function reset(): void { + $this->at = 0; + $this->token_start = null; + $this->token_length = 0; + $this->token_type = null; + $this->block_depth = 0; + $this->replacements = array(); + } + + /** + * Consumes and removes the block or semicolon tail of a disallowed at-rule. + * + * Called immediately after calling remove_token() on a disallowed at-keyword. + * Advances the cursor and calls remove_token() on every token until the at-rule + * ends — either at a top-level ';' (statement at-rules like @import) or after + * the closing '}' of a balanced block at-rule like @media. + * + * @since X.X.0 + */ + private function consume_and_remove_rule_block(): void { + // Use a local depth counter rather than $this->block_depth because we want to + // track nesting relative to the at-rule being consumed, not the global document + // depth. $this->block_depth is updated by next_token() for all tokens consumed + // here and will be correct on return — the two counters remain consistent. + $depth = 0; + while ( $this->next_token() ) { + $type = $this->get_token_type(); + $this->remove_token(); + + if ( self::OPEN_CURLY_TOKEN === $type ) { + ++$depth; + } elseif ( self::CLOSE_CURLY_TOKEN === $type ) { + --$depth; + if ( $depth <= 0 ) { + break; + } + } elseif ( self::SEMICOLON_TOKEN === $type && 0 === $depth ) { + break; + } + } + } + + /** + * Determines whether the byte at the given offset is an ident-start character. + * + * An ident-start character is one of: a–z, A–Z, `_`, any byte with value + * greater than 127 (non-ASCII), or `-` followed by another ident-start char + * or another `-` (for custom properties `--`). + * + * @since X.X.0 + * + * @param int $offset Byte offset into the CSS string. + * @return bool True if the byte at $offset begins an identifier. + */ + private function is_ident_start( int $offset ): bool { + if ( $offset >= $this->length ) { + return false; + } + $c = $this->css[ $offset ]; + $o = ord( $c ); + + // a–z, A–Z, underscore, or non-ASCII. + if ( ( $o >= 65 && $o <= 90 ) || ( $o >= 97 && $o <= 122 ) || 95 === $o || $o > 127 ) { + return true; + } + + // `-` can start an ident if followed by another `-` (custom property) or + // another ident-start character. + if ( '-' === $c ) { + if ( $offset + 1 >= $this->length ) { + return false; + } + $nc = $this->css[ $offset + 1 ]; + $no = ord( $nc ); + if ( '-' === $nc ) { + return true; // `--` custom property. + } + if ( ( $no >= 65 && $no <= 90 ) || ( $no >= 97 && $no <= 122 ) || 95 === $no || $no > 127 ) { + return true; + } + } + + return false; + } + + /** + * Determines whether a single character is a valid ident body character. + * + * Ident body characters are: a–z, A–Z, 0–9, `-`, `_`, or non-ASCII bytes. + * + * @since X.X.0 + * + * @param string $c A single byte/character. + * @return bool True if $c is a valid ident body character. + */ + private function is_ident_char( string $c ): bool { + $o = ord( $c ); + return ( $o >= 97 && $o <= 122 ) // a–z + || ( $o >= 65 && $o <= 90 ) // A–Z + || ( $o >= 48 && $o <= 57 ) // 0–9 + || 45 === $o // `-` + || 95 === $o // `_` + || $o > 127; // non-ASCII + } + + /** + * Advances $this->at past all ident body characters starting at $this->at. + * + * @since X.X.0 + * + * @return void + */ + private function consume_ident_chars(): void { + // Note: CSS escape sequences (\XX hex or \) in identifiers are not + // supported in v1. A backslash is emitted as DELIM_TOKEN. See "Known gaps" + // in the class docblock. + while ( $this->at < $this->length && $this->is_ident_char( $this->css[ $this->at ] ) ) { + $this->at++; + } + } + + /** + * Determines whether the byte sequence starting at $offset looks like the + * start of a CSS number. + * + * A number start is: a digit, or `+`/`-` followed by a digit or `.` followed + * by a digit, or `.` followed by a digit. + * + * @since X.X.0 + * + * @param int $offset Byte offset into the CSS string. + * @return bool True if a number starts at $offset. + */ + private function is_number_start( int $offset ): bool { + if ( $offset >= $this->length ) { + return false; + } + $c = $this->css[ $offset ]; + $o = ord( $c ); + + // Digit. + if ( $o >= 48 && $o <= 57 ) { + return true; + } + + // `+` or `-` followed by digit or `.digit`. + if ( '+' === $c || '-' === $c ) { + if ( $offset + 1 < $this->length ) { + $n1 = ord( $this->css[ $offset + 1 ] ); + if ( $n1 >= 48 && $n1 <= 57 ) { + return true; + } + if ( '.' === $this->css[ $offset + 1 ] && $offset + 2 < $this->length ) { + $n2 = ord( $this->css[ $offset + 2 ] ); + if ( $n2 >= 48 && $n2 <= 57 ) { + return true; + } + } + } + return false; + } + + // `.digit`. + if ( '.' === $c ) { + if ( $offset + 1 < $this->length ) { + $n1 = ord( $this->css[ $offset + 1 ] ); + if ( $n1 >= 48 && $n1 <= 57 ) { + return true; + } + } + return false; + } + + return false; + } + + /** + * Advances $this->at past a complete CSS number (integer or decimal, with optional sign). + * + * @since X.X.0 + * + * @return void + */ + private function consume_number(): void { + // Optional sign. + if ( $this->at < $this->length && ( '+' === $this->css[ $this->at ] || '-' === $this->css[ $this->at ] ) ) { + $this->at++; + } + // Integer part. + while ( $this->at < $this->length ) { + $o = ord( $this->css[ $this->at ] ); + if ( $o >= 48 && $o <= 57 ) { + $this->at++; + } else { + break; + } + } + // Optional decimal part. + if ( $this->at < $this->length && '.' === $this->css[ $this->at ] ) { + $this->at++; + while ( $this->at < $this->length ) { + $o = ord( $this->css[ $this->at ] ); + if ( $o >= 48 && $o <= 57 ) { + $this->at++; + } else { + break; + } + } + } + // Optional exponent (e/E followed by optional sign and digits). + if ( $this->at < $this->length && ( 'e' === $this->css[ $this->at ] || 'E' === $this->css[ $this->at ] ) ) { + $next_offset = $this->at + 1; + if ( $next_offset < $this->length && ( '+' === $this->css[ $next_offset ] || '-' === $this->css[ $next_offset ] ) ) { + $next_offset++; + } + if ( $next_offset < $this->length ) { + $o = ord( $this->css[ $next_offset ] ); + if ( $o >= 48 && $o <= 57 ) { + $this->at = $next_offset; + while ( $this->at < $this->length ) { + $o = ord( $this->css[ $this->at ] ); + if ( $o >= 48 && $o <= 57 ) { + $this->at++; + } else { + break; + } + } + } + } + } + } + + /** + * Consumes a `url(…)` sequence after the ident `url` has been consumed and `(` + * is the current character. + * + * If the first non-whitespace character inside the parentheses is `"` or `'`, + * this falls through to a FUNCTION_TOKEN so the caller can later encounter a + * STRING_TOKEN inside it. Otherwise the unquoted URL body is consumed, emitting + * URL_TOKEN on success or BAD_URL_TOKEN on failure. + * + * @since X.X.0 + * + * @return bool Always true (a token was consumed). + */ + private function consume_url_or_function(): bool { + // Consume the `(`. + $this->at++; + + // Peek past optional whitespace. + $peek = $this->at; + while ( $peek < $this->length ) { + $pc = $this->css[ $peek ]; + if ( ' ' !== $pc && "\t" !== $pc && "\n" !== $pc && "\r" !== $pc && "\f" !== $pc ) { + break; + } + $peek++; + } + + // If the next non-whitespace char is a quote, emit FUNCTION_TOKEN. + if ( $peek < $this->length && ( '"' === $this->css[ $peek ] || "'" === $this->css[ $peek ] ) ) { + $this->token_type = self::FUNCTION_TOKEN; + // Use $this->at (not a peek offset) so the FUNCTION_TOKEN spans only 'url(' — + // whitespace between 'url(' and the quote is not part of this token; it will be + // emitted as a separate WHITESPACE_TOKEN by the next next_token() call. + $this->token_length = $this->at - $this->token_start; + return true; + } + + // Consume optional leading whitespace inside the url(). + $this->at = $peek; + + // Consume unquoted URL characters. + $bad = false; + while ( $this->at < $this->length ) { + $uc = $this->css[ $this->at ]; + if ( ')' === $uc ) { + $this->at++; + break; + } + if ( ' ' === $uc || "\t" === $uc || "\n" === $uc || "\r" === $uc || "\f" === $uc ) { + // Whitespace mid-URL: consume trailing whitespace then expect `)`. + while ( $this->at < $this->length ) { + $wc = $this->css[ $this->at ]; + if ( ' ' !== $wc && "\t" !== $wc && "\n" !== $wc && "\r" !== $wc && "\f" !== $wc ) { + break; + } + $this->at++; + } + if ( $this->at < $this->length && ')' === $this->css[ $this->at ] ) { + $this->at++; + } else { + $bad = true; + // Note: Escape sequences in bad-URL recovery are not handled in v1 + // (e.g. \) would incorrectly end the bad-URL token). BAD_URL_TOKEN is + // always stripped by sanitize(), so the impact is contained. + // Consume until `)` or EOF. + while ( $this->at < $this->length && ')' !== $this->css[ $this->at ] ) { + $this->at++; + } + if ( $this->at < $this->length ) { + $this->at++; + } + } + break; + } + if ( '"' === $uc || "'" === $uc || '(' === $uc ) { + $bad = true; + // Consume until `)` or EOF. + while ( $this->at < $this->length && ')' !== $this->css[ $this->at ] ) { + $this->at++; + } + if ( $this->at < $this->length ) { + $this->at++; + } + break; + } + $this->at++; + } + + $this->token_type = $bad ? self::BAD_URL_TOKEN : self::URL_TOKEN; + $this->token_length = $this->at - $this->token_start; + return true; + } +} diff --git a/src/wp-settings.php b/src/wp-settings.php index 023cdccd5ecc9..3ddcb60d89ddf 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -276,6 +276,7 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; +require ABSPATH . WPINC . '/css-api/class-wp-css-token-processor.php'; require ABSPATH . WPINC . '/class-wp-block-processor.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; diff --git a/tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php b/tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php new file mode 100644 index 0000000000000..49349c551f364 --- /dev/null +++ b/tests/phpunit/tests/css-api/WpCssTokenProcessorTest.php @@ -0,0 +1,1123 @@ +assertFalse( $p->next_token() ); + } + + /** + * Tests that a whitespace-only input produces a single WHITESPACE_TOKEN + * followed by end-of-input. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_whitespace_token() { + $p = new WP_CSS_Token_Processor( ' ' ); + $this->assertTrue( $p->next_token() ); + $this->assertSame( WP_CSS_Token_Processor::WHITESPACE_TOKEN, $p->get_token_type() ); + $this->assertSame( ' ', $p->get_token_value() ); + $this->assertFalse( $p->next_token() ); + } + + /** + * Tests that mixed whitespace characters (space, tab, newline) are collapsed + * into a single WHITESPACE_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_whitespace_token_mixed_chars() { + $p = new WP_CSS_Token_Processor( " \t\n\r\f" ); + $this->assertTrue( $p->next_token() ); + $this->assertSame( WP_CSS_Token_Processor::WHITESPACE_TOKEN, $p->get_token_type() ); + $this->assertSame( " \t\n\r\f", $p->get_token_value() ); + $this->assertFalse( $p->next_token() ); + } + + // ------------------------------------------------------------------------- + // Group B: Single-character punctuation + // ------------------------------------------------------------------------- + + /** + * Tests that `:` produces a COLON_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_colon_token() { + $p = new WP_CSS_Token_Processor( ':' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::COLON_TOKEN, $p->get_token_type() ); + $this->assertSame( ':', $p->get_token_value() ); + } + + /** + * Tests that `;` produces a SEMICOLON_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_semicolon_token() { + $p = new WP_CSS_Token_Processor( ';' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::SEMICOLON_TOKEN, $p->get_token_type() ); + $this->assertSame( ';', $p->get_token_value() ); + } + + /** + * Tests that `,` produces a COMMA_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_comma_token() { + $p = new WP_CSS_Token_Processor( ',' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::COMMA_TOKEN, $p->get_token_type() ); + $this->assertSame( ',', $p->get_token_value() ); + } + + /** + * Tests that `{` produces an OPEN_CURLY_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_open_curly_token() { + $p = new WP_CSS_Token_Processor( '{' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::OPEN_CURLY_TOKEN, $p->get_token_type() ); + $this->assertSame( '{', $p->get_token_value() ); + } + + /** + * Tests that `}` produces a CLOSE_CURLY_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_close_curly_token() { + $p = new WP_CSS_Token_Processor( '}' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::CLOSE_CURLY_TOKEN, $p->get_token_type() ); + $this->assertSame( '}', $p->get_token_value() ); + } + + /** + * Tests that `(` produces an OPEN_PAREN_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_open_paren_token() { + $p = new WP_CSS_Token_Processor( '(' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::OPEN_PAREN_TOKEN, $p->get_token_type() ); + $this->assertSame( '(', $p->get_token_value() ); + } + + /** + * Tests that `)` produces a CLOSE_PAREN_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_close_paren_token() { + $p = new WP_CSS_Token_Processor( ')' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::CLOSE_PAREN_TOKEN, $p->get_token_type() ); + $this->assertSame( ')', $p->get_token_value() ); + } + + /** + * Tests that `[` produces an OPEN_SQUARE_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_open_square_token() { + $p = new WP_CSS_Token_Processor( '[' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::OPEN_SQUARE_TOKEN, $p->get_token_type() ); + $this->assertSame( '[', $p->get_token_value() ); + } + + /** + * Tests that `]` produces a CLOSE_SQUARE_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_close_square_token() { + $p = new WP_CSS_Token_Processor( ']' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::CLOSE_SQUARE_TOKEN, $p->get_token_type() ); + $this->assertSame( ']', $p->get_token_value() ); + } + + // ------------------------------------------------------------------------- + // Group C: ident-token and function-token + // ------------------------------------------------------------------------- + + /** + * Tests that a simple property name produces an IDENT_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_ident_token_simple() { + $p = new WP_CSS_Token_Processor( 'color' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::IDENT_TOKEN, $p->get_token_type() ); + $this->assertSame( 'color', $p->get_token_value() ); + } + + /** + * Tests that an ident with an internal hyphen is tokenized as a single IDENT_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_ident_token_with_hyphen() { + $p = new WP_CSS_Token_Processor( 'background-color' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::IDENT_TOKEN, $p->get_token_type() ); + $this->assertSame( 'background-color', $p->get_token_value() ); + } + + /** + * Tests that a CSS custom property name starting with `--` produces an IDENT_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_ident_token_custom_property() { + $p = new WP_CSS_Token_Processor( '--my-var' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::IDENT_TOKEN, $p->get_token_type() ); + $this->assertSame( '--my-var', $p->get_token_value() ); + } + + /** + * Tests that an ident followed immediately by `(` produces a FUNCTION_TOKEN + * whose value includes the opening parenthesis. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_function_token() { + $p = new WP_CSS_Token_Processor( 'calc(' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::FUNCTION_TOKEN, $p->get_token_type() ); + $this->assertSame( 'calc(', $p->get_token_value() ); + } + + /** + * Tests that an upper-case ident is tokenized correctly. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_ident_token_uppercase() { + $p = new WP_CSS_Token_Processor( 'COLOR' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::IDENT_TOKEN, $p->get_token_type() ); + $this->assertSame( 'COLOR', $p->get_token_value() ); + } + + /** + * Tests that an ident starting with an underscore is tokenized correctly. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_ident_token_underscore_start() { + $p = new WP_CSS_Token_Processor( '_private' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::IDENT_TOKEN, $p->get_token_type() ); + $this->assertSame( '_private', $p->get_token_value() ); + } + + // ------------------------------------------------------------------------- + // Group D: at-keyword-token + // ------------------------------------------------------------------------- + + /** + * Tests that `@media` produces an AT_KEYWORD_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_at_keyword_token_media() { + $p = new WP_CSS_Token_Processor( '@media' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::AT_KEYWORD_TOKEN, $p->get_token_type() ); + $this->assertSame( '@media', $p->get_token_value() ); + } + + /** + * Tests that `@import` produces an AT_KEYWORD_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_at_keyword_token_import() { + $p = new WP_CSS_Token_Processor( '@import' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::AT_KEYWORD_TOKEN, $p->get_token_type() ); + $this->assertSame( '@import', $p->get_token_value() ); + } + + /** + * Tests that `@keyframes` produces an AT_KEYWORD_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_at_keyword_token_keyframes() { + $p = new WP_CSS_Token_Processor( '@keyframes' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::AT_KEYWORD_TOKEN, $p->get_token_type() ); + $this->assertSame( '@keyframes', $p->get_token_value() ); + } + + /** + * Tests that a lone `@` with no following ident produces a DELIM_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_at_sign_alone_is_delim() { + $p = new WP_CSS_Token_Processor( '@' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::DELIM_TOKEN, $p->get_token_type() ); + } + + // ------------------------------------------------------------------------- + // Group E: hash-token + // ------------------------------------------------------------------------- + + /** + * Tests that a hex color value produces a HASH_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_hash_token_color() { + $p = new WP_CSS_Token_Processor( '#ff0000' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::HASH_TOKEN, $p->get_token_type() ); + $this->assertSame( '#ff0000', $p->get_token_value() ); + } + + /** + * Tests that a short hex color produces a HASH_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_hash_token_short_color() { + $p = new WP_CSS_Token_Processor( '#abc' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::HASH_TOKEN, $p->get_token_type() ); + $this->assertSame( '#abc', $p->get_token_value() ); + } + + /** + * Tests that a lone `#` with no following ident character produces a DELIM_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_hash_alone_is_delim() { + $p = new WP_CSS_Token_Processor( '# ' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::DELIM_TOKEN, $p->get_token_type() ); + } + + // ------------------------------------------------------------------------- + // Group F: numeric tokens + // ------------------------------------------------------------------------- + + /** + * Tests that a plain integer produces a NUMBER_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_number_token_integer() { + $p = new WP_CSS_Token_Processor( '42' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::NUMBER_TOKEN, $p->get_token_type() ); + $this->assertSame( '42', $p->get_token_value() ); + } + + /** + * Tests that a number followed by a unit produces a DIMENSION_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_dimension_token() { + $p = new WP_CSS_Token_Processor( '16px' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::DIMENSION_TOKEN, $p->get_token_type() ); + $this->assertSame( '16px', $p->get_token_value() ); + } + + /** + * Tests that a number followed by `%` produces a PERCENTAGE_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_percentage_token() { + $p = new WP_CSS_Token_Processor( '50%' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::PERCENTAGE_TOKEN, $p->get_token_type() ); + $this->assertSame( '50%', $p->get_token_value() ); + } + + /** + * Tests that a decimal number followed by a unit produces a DIMENSION_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_dimension_token_rem() { + $p = new WP_CSS_Token_Processor( '1.5rem' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::DIMENSION_TOKEN, $p->get_token_type() ); + $this->assertSame( '1.5rem', $p->get_token_value() ); + } + + /** + * Tests that a decimal number with no unit produces a NUMBER_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_number_token_decimal() { + $p = new WP_CSS_Token_Processor( '3.14' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::NUMBER_TOKEN, $p->get_token_type() ); + $this->assertSame( '3.14', $p->get_token_value() ); + } + + /** + * Tests that a number starting with `.` (no leading digit) produces a NUMBER_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_number_token_leading_dot() { + $p = new WP_CSS_Token_Processor( '.5' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::NUMBER_TOKEN, $p->get_token_type() ); + $this->assertSame( '.5', $p->get_token_value() ); + } + + // ------------------------------------------------------------------------- + // Group G: string-token and bad-string-token + // ------------------------------------------------------------------------- + + /** + * Tests that a double-quoted string produces a STRING_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_string_token_double_quoted() { + $p = new WP_CSS_Token_Processor( '"hello world"' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::STRING_TOKEN, $p->get_token_type() ); + $this->assertSame( '"hello world"', $p->get_token_value() ); + } + + /** + * Tests that a single-quoted string produces a STRING_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_string_token_single_quoted() { + $p = new WP_CSS_Token_Processor( "'hello'" ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::STRING_TOKEN, $p->get_token_type() ); + } + + /** + * Tests that a string containing an unescaped newline produces a BAD_STRING_TOKEN. + * + * A newline inside a string without a backslash escape is a bad-string-token + * per the CSS Syntax Level 3 specification. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_bad_string_token_unterminated() { + $p = new WP_CSS_Token_Processor( "\"hello\nworld\"" ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::BAD_STRING_TOKEN, $p->get_token_type() ); + } + + /** + * Tests that a string with an escaped newline (`\`) is valid (STRING_TOKEN). + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_string_token_escaped_newline_is_valid() { + $p = new WP_CSS_Token_Processor( "\"hello\\\nworld\"" ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::STRING_TOKEN, $p->get_token_type() ); + } + + // ------------------------------------------------------------------------- + // Group H: url-token and bad-url-token + // ------------------------------------------------------------------------- + + /** + * Tests that an unquoted url() produces a URL_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_url_token_unquoted() { + $p = new WP_CSS_Token_Processor( 'url(foo.png)' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::URL_TOKEN, $p->get_token_type() ); + $this->assertSame( 'url(foo.png)', $p->get_token_value() ); + } + + /** + * Tests that a quoted url() produces a FUNCTION_TOKEN (the string token is + * consumed separately by the caller). + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_url_token_with_quotes_is_function() { + $p = new WP_CSS_Token_Processor( 'url("foo.png")' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::FUNCTION_TOKEN, $p->get_token_type() ); + } + + /** + * Tests that an unquoted url() containing a space in the URL body produces + * a BAD_URL_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_bad_url_token() { + $p = new WP_CSS_Token_Processor( 'url(bad url)' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::BAD_URL_TOKEN, $p->get_token_type() ); + } + + /** + * Tests that `URL(` (uppercase) with an unquoted URL produces a URL_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + */ + public function test_url_token_uppercase() { + $p = new WP_CSS_Token_Processor( 'URL(foo.png)' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::URL_TOKEN, $p->get_token_type() ); + } + + // ------------------------------------------------------------------------- + // Group I: CDO-token and CDC-token + // ------------------------------------------------------------------------- + + /** + * Tests that `` produces a CDC_TOKEN. + * + * @since X.X.0 + * + * @covers ::next_token + * @covers ::get_token_type + * @covers ::get_token_value + */ + public function test_cdc_token() { + $p = new WP_CSS_Token_Processor( '-->' ); + $p->next_token(); + $this->assertSame( WP_CSS_Token_Processor::CDC_TOKEN, $p->get_token_type() ); + $this->assertSame( '-->', $p->get_token_value() ); + } + + /** + * Tests that `color: red;' ) ); + } + + // --- Bad tokens stripped --- + + public function test_bad_string_token_stripped() { + // An unescaped newline produces a bad-string-token for the partial string up to the newline. + // The remainder of the line (after the newline) is preserved as separate tokens, + // so we can only assert that the bad content is not present rather than the exact output. + $result = $this->sanitize( "content: \"bad\nstring\";" ); + $this->assertStringNotContainsString( 'bad', $result ); + } + + public function test_bad_url_token_stripped() { + // Whitespace inside url() produces a bad-url-token; the whole url(…) is stripped. + $this->assertSame( 'background-image: ;', $this->sanitize( 'background-image: url(bad url);' ) ); + } + + public function test_get_removed_tokens_reason_bad_string() { + $p = new WP_CSS_Token_Processor( "color: \"bad\nstring\";" ); + $p->sanitize(); + $removed = $p->get_removed_tokens(); + $this->assertNotEmpty( $removed, 'Expected bad_string token to be recorded' ); + $this->assertSame( 'bad_string', $removed[0]['reason'] ); + } + + public function test_get_removed_tokens_reason_bad_url() { + $p = new WP_CSS_Token_Processor( 'background: url(bad url);' ); + $p->sanitize(); + $removed = $p->get_removed_tokens(); + $this->assertNotEmpty( $removed, 'Expected bad_url token to be recorded' ); + $this->assertSame( 'bad_url', $removed[0]['reason'] ); + } + + // --- URL protocol filtering --- + + public function test_url_with_javascript_protocol_stripped() { + // url(javascript:evil) is a URL_TOKEN; javascript: scheme is always stripped entirely. + $this->assertSame( 'background: ;', $this->sanitize( 'background: url(javascript:evil);' ) ); + } + + public function test_url_with_data_protocol_stripped() { + $this->assertSame( 'background: ;', $this->sanitize( 'background: url(data:image/png;base64,abc);' ) ); + } + + public function test_url_with_https_survives() { + $css = 'background: url(https://example.com/image.png);'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + public function test_url_with_relative_path_survives() { + $css = 'background: url(image.png);'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + // --- At-rule allowlist --- + + public function test_allowed_at_rule_media_survives() { + $css = '@media (max-width: 768px) { color: red; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + public function test_allowed_at_rule_supports_survives() { + $css = '@supports (display: grid) { color: red; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + public function test_allowed_at_rule_keyframes_survives() { + $css = '@keyframes slide { from { opacity: 0; } to { opacity: 1; } }'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + public function test_blocked_at_rule_import_stripped() { + $result = $this->sanitize( "@import url('https://evil.com/style.css'); color: red;" ); + $this->assertStringNotContainsString( '@import', $result ); + $this->assertStringContainsString( 'color: red;', $result ); + } + + public function test_blocked_at_rule_charset_stripped() { + $result = $this->sanitize( '@charset "UTF-8"; color: red;' ); + $this->assertStringNotContainsString( '@charset', $result ); + $this->assertStringContainsString( 'color: red;', $result ); + } + + public function test_unknown_at_rule_stripped() { + $result = $this->sanitize( '@unknown-future-rule { color: red; } .a { color: blue; }' ); + $this->assertStringNotContainsString( '@unknown-future-rule', $result ); + $this->assertStringContainsString( 'color: blue;', $result ); + } + + public function test_allowed_at_rule_layer_survives() { + $css = '@layer utilities { color: red; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + public function test_allowed_at_rule_container_survives() { + $css = '@container (width > 400px) { color: red; }'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + public function test_allowed_at_rule_font_face_survives() { + $css = '@font-face { font-family: "My Font"; src: url(my-font.woff2); }'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + public function test_blocked_at_rule_namespace_stripped() { + $result = $this->sanitize( '@namespace url(http://www.w3.org/1999/xhtml); color: red;' ); + $this->assertStringNotContainsString( '@namespace', $result ); + $this->assertStringContainsString( 'color: red;', $result ); + } + + public function test_allowed_at_rule_webkit_keyframes_survives() { + $css = '@-webkit-keyframes slide { from { opacity: 0; } to { opacity: 1; } }'; + $this->assertSame( $css, $this->sanitize( $css ) ); + } + + public function test_multiple_blocked_at_rules_all_stripped() { + $result = $this->sanitize( "@import 'evil.css'; @charset 'UTF-8'; color: red;" ); + $this->assertStringNotContainsString( '@import', $result ); + $this->assertStringNotContainsString( '@charset', $result ); + $this->assertStringContainsString( 'color: red;', $result ); + } + + // --- get_removed_tokens() --- + + public function test_get_removed_tokens_empty_when_nothing_stripped() { + $p = new WP_CSS_Token_Processor( 'color: red;' ); + $p->sanitize(); + $this->assertEmpty( $p->get_removed_tokens() ); + } + + public function test_get_removed_tokens_populated_after_strip() { + $p = new WP_CSS_Token_Processor( 'background: url(javascript:alert(1));' ); + $p->sanitize(); + $removed = $p->get_removed_tokens(); + $this->assertNotEmpty( $removed ); + $this->assertArrayHasKey( 'token', $removed[0] ); + $this->assertArrayHasKey( 'reason', $removed[0] ); + } + + public function test_get_removed_tokens_contains_correct_reason_for_html_comment() { + $p = new WP_CSS_Token_Processor( 'color: red;' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_html_comment', $result->get_error_code() ); + } + + // --- Malformed tokens --- + + /** + * @covers ::validate + */ + public function test_bad_string_token_returns_wp_error_with_css_malformed_token() { + $result = $this->validate( "\"bad\nstring\"" ); + $this->assertWPError( $result ); + $this->assertSame( 'css_malformed_token', $result->get_error_code() ); + } + + /** + * @covers ::validate + */ + public function test_bad_url_token_returns_wp_error_with_css_malformed_token() { + $result = $this->validate( 'url(bad url)' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_malformed_token', $result->get_error_code() ); + } + + // --- Unsafe URLs --- + + /** + * @covers ::validate + */ + public function test_javascript_url_returns_wp_error_with_css_unsafe_url() { + $result = $this->validate( 'url(javascript:evil)' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_unsafe_url', $result->get_error_code() ); + } + + /** + * @covers ::validate + */ + public function test_data_url_returns_wp_error_with_css_unsafe_url() { + $result = $this->validate( 'url(data:image/png;base64,abc)' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_unsafe_url', $result->get_error_code() ); + } + + // --- Disallowed at-rules --- + + /** + * @covers ::validate + */ + public function test_import_at_rule_returns_wp_error_with_css_disallowed_at_rule() { + $result = $this->validate( "@import url('evil.css');" ); + $this->assertWPError( $result ); + $this->assertSame( 'css_disallowed_at_rule', $result->get_error_code() ); + } + + /** + * @covers ::validate + */ + public function test_unknown_at_rule_returns_wp_error_with_css_disallowed_at_rule() { + $result = $this->validate( '@unknown { }' ); + $this->assertWPError( $result ); + $this->assertSame( 'css_disallowed_at_rule', $result->get_error_code() ); + } + + /** + * @covers ::validate + */ + public function test_media_at_rule_returns_true() { + $result = $this->validate( '@media (max-width: 768px) { color: red; }' ); + $this->assertTrue( $result ); + } + + // --- Guarantee: validate() === true implies sanitize() is a no-op --- + + /** + * validate() === true guarantees sanitize() is a no-op on the same input. + * + * @dataProvider data_validate_true_implies_sanitize_noop + * + * @covers ::validate + * @covers ::sanitize + */ + public function test_validate_true_implies_sanitize_noop( string $css ) { + $this->assertTrue( $this->validate( $css ), 'Expected validate() to return true for this fixture' ); + $sanitized = ( new WP_CSS_Token_Processor( $css ) )->sanitize(); + $this->assertSame( $css, $sanitized, 'validate() returning true must mean sanitize() is a no-op' ); + } + + /** + * Data provider for the validate() === true implies sanitize() is a no-op guarantee. + * + * @return array + */ + public function data_validate_true_implies_sanitize_noop(): array { + return array( + 'simple declaration' => array( 'color: red;' ), + 'nesting ampersand' => array( 'color: blue; & p { color: red; }' ), + 'child combinator' => array( '& > p { margin: 0; }' ), + 'media query' => array( '@media (max-width: 768px) { color: red; }' ), + 'custom property' => array( '--my-color: #ff0000;' ), + 'var() usage' => array( 'color: var(--my-color);' ), + 'empty string' => array( '' ), + 'https url' => array( 'background: url(https://example.com/image.png);' ), + 'relative url' => array( 'background: url(image.png);' ), + ); + } +}