diff --git a/src/parable.js b/src/parable.js index b23c512a..39f63d2c 100644 --- a/src/parable.js +++ b/src/parable.js @@ -88,20 +88,136 @@ function _countConsecutiveDollarsBefore(s, pos) { return count; } +class QuoteState { + constructor() { + this.single = false; + this.double = false; + this._stack = []; + } + + toggleSingle() { + if (!this.double) { + this.single = !this.single; + } + } + + toggleDouble() { + if (!this.single) { + this.double = !this.double; + } + } + + push() { + this._stack.push([this.single, this.double]); + this.single = false; + this.double = false; + } + + pop() { + if (this._stack) { + [this.single, this.double] = this._stack.pop(); + } + } + + inQuotes() { + return this.single || this.double; + } + + processChar(c, prev_escaped) { + if (prev_escaped == null) { + prev_escaped = false; + } + if (prev_escaped) { + return; + } + if (c === "'" && !this.double) { + this.single = !this.single; + } else if (c === '"' && !this.single) { + this.double = !this.double; + } + } + + copy() { + let qs; + qs = new QuoteState(); + qs.single = this.single; + qs.double = this.double; + qs._stack = Array.from(this._stack); + return qs; + } + + outerDouble() { + if (this._stack.length === 0) { + return false; + } + return this._stack[this._stack.length - 1][1]; + } + + getDepth() { + return this._stack.length; + } +} + +class ParseContext { + // Context kind constants + NORMAL = 0; + COMMAND_SUB = 1; + ARITHMETIC = 2; + CASE_PATTERN = 3; + BRACE_EXPANSION = 4; + constructor(kind) { + if (kind == null) { + kind = 0; + } + this.kind = kind; + this.paren_depth = 0; + this.brace_depth = 0; + this.bracket_depth = 0; + this.quote = new QuoteState(); + } +} + +class ContextStack { + constructor() { + this._stack = [new ParseContext()]; + } + + getCurrent() { + return this._stack[this._stack.length - 1]; + } + + push(kind) { + this._stack.push(new ParseContext(kind)); + } + + pop() { + if (this._stack.length > 1) { + return this._stack.pop(); + } + return this._stack[0]; + } + + inContext(kind) { + let ctx; + for (ctx of this._stack) { + if (ctx.kind === kind) { + return true; + } + } + return false; + } + + getDepth() { + return this._stack.length; + } +} + function _stripLineContinuationsCommentAware(text) { - let c, - i, - in_comment, - in_double, - in_single, - j, - num_preceding_backslashes, - result; + let c, i, in_comment, j, num_preceding_backslashes, quote, result; result = []; i = 0; in_comment = false; - in_single = false; - in_double = false; + quote = new QuoteState(); while (i < text.length) { c = text[i]; if (c === "\\" && i + 1 < text.length && text[i + 1] === "\n") { @@ -132,11 +248,11 @@ function _stripLineContinuationsCommentAware(text) { i += 1; continue; } - if (c === "'" && !in_double && !in_comment) { - in_single = !in_single; - } else if (c === '"' && !in_single && !in_comment) { - in_double = !in_double; - } else if (c === "#" && !in_single && !in_comment) { + if (c === "'" && !quote.double && !in_comment) { + quote.single = !quote.single; + } else if (c === '"' && !quote.single && !in_comment) { + quote.double = !quote.double; + } else if (c === "#" && !quote.single && !in_comment) { in_comment = true; } result.push(c); @@ -213,22 +329,21 @@ class Word extends Node { } _doubleCtlescSmart(value) { - let bs_count, c, in_double, in_single, j, result; + let bs_count, c, j, quote, result; result = []; - in_single = false; - in_double = false; + quote = new QuoteState(); for (c of value) { // Track quote state - if (c === "'" && !in_double) { - in_single = !in_single; - } else if (c === '"' && !in_single) { - in_double = !in_double; + if (c === "'" && !quote.double) { + quote.single = !quote.single; + } else if (c === '"' && !quote.single) { + quote.double = !quote.double; } result.push(c); if (c === "") { // Only count backslashes in double-quoted context (where they escape) // In single quotes, backslashes are literal, so always double CTLESC - if (in_double) { + if (quote.double) { bs_count = 0; for (j = result.length - 2; j > -1; j--) { if (result[j] === "\\") { @@ -250,27 +365,26 @@ class Word extends Node { } _normalizeParamExpansionNewlines(value) { - let c, ch, depth, had_leading_newline, i, in_double, in_single, result; + let c, ch, depth, had_leading_newline, i, quote, result; result = []; i = 0; - in_single = false; - in_double = false; + quote = new QuoteState(); while (i < value.length) { c = value[i]; // Track quote state - if (c === "'" && !in_double) { - in_single = !in_single; + if (c === "'" && !quote.double) { + quote.single = !quote.single; result.push(c); i += 1; - } else if (c === '"' && !in_single) { - in_double = !in_double; + } else if (c === '"' && !quote.single) { + quote.double = !quote.double; result.push(c); i += 1; } else if ( c === "$" && i + 1 < value.length && value[i + 1] === "{" && - !in_single + !quote.single ) { // Check for ${ param expansion result.push("$"); @@ -286,7 +400,7 @@ class Word extends Node { depth = 1; while (i < value.length && depth > 0) { ch = value[i]; - if (ch === "\\" && i + 1 < value.length && !in_single) { + if (ch === "\\" && i + 1 < value.length && !quote.single) { if (value[i + 1] === "\n") { i += 2; continue; @@ -296,11 +410,11 @@ class Word extends Node { i += 2; continue; } - if (ch === "'" && !in_double) { - in_single = !in_single; - } else if (ch === '"' && !in_single) { - in_double = !in_double; - } else if (!in_single && !in_double) { + if (ch === "'" && !quote.double) { + quote.single = !quote.single; + } else if (ch === '"' && !quote.single) { + quote.double = !quote.double; + } else if (!quote.inQuotes()) { if (ch === "{") { depth += 1; } else if (ch === "}") { @@ -514,9 +628,7 @@ class Word extends Node { first_char, i, in_backtick, - in_double_quote, in_pattern, - in_single_quote, inner, is_ansi_c, j, @@ -524,22 +636,20 @@ class Word extends Node { op, op_start, outer_in_dquote, - quote_stack, + quote, rest, result, result_str, var_name_len; result = []; i = 0; - in_single_quote = false; - in_double_quote = false; + quote = new QuoteState(); in_backtick = false; brace_depth = 0; - quote_stack = []; while (i < value.length) { ch = value[i]; // Track backtick context - don't expand $'...' inside backticks - if (ch === "`" && !in_single_quote) { + if (ch === "`" && !quote.single) { in_backtick = !in_backtick; result.push(ch); i += 1; @@ -558,52 +668,48 @@ class Word extends Node { continue; } // Track brace depth for parameter expansions - if (!in_single_quote) { + if (!quote.single) { if (_startsWithAt(value, i, "${")) { brace_depth += 1; - quote_stack.push([in_single_quote, in_double_quote]); - in_single_quote = false; - in_double_quote = false; + quote.push(); result.push("${"); i += 2; continue; - } else if (ch === "}" && brace_depth > 0 && !in_double_quote) { + } else if (ch === "}" && brace_depth > 0 && !quote.double) { brace_depth -= 1; result.push(ch); - if (quote_stack) { - [in_single_quote, in_double_quote] = quote_stack.pop(); - } + quote.pop(); i += 1; continue; } } // Double quotes inside ${...} still protect $'...' from expansion - effective_in_dquote = in_double_quote; + effective_in_dquote = quote.double; // Track quote state to avoid matching $' inside regular quotes if (ch === "'" && !effective_in_dquote) { // Toggle quote state unless this is $' that will be expanded as ANSI-C is_ansi_c = - !in_single_quote && + !quote.single && i > 0 && value[i - 1] === "$" && _countConsecutiveDollarsBefore(value, i - 1) % 2 === 0; if (!is_ansi_c) { - in_single_quote = !in_single_quote; + quote.single = !quote.single; } result.push(ch); i += 1; - } else if (ch === '"' && !in_single_quote) { - in_double_quote = !in_double_quote; + } else if (ch === '"' && !quote.single) { + quote.double = !quote.double; result.push(ch); i += 1; - } else if (ch === "\\" && i + 1 < value.length && !in_single_quote) { + } else if (ch === "\\" && i + 1 < value.length && !quote.single) { // Backslash escape - skip both chars to avoid misinterpreting \" or \' result.push(ch); result.push(value[i + 1]); i += 2; } else if ( _startsWithAt(value, i, "$'") && - !in_single_quote && + !quote.single && !effective_in_dquote && _countConsecutiveDollarsBefore(value, i) % 2 === 0 ) { @@ -624,10 +730,7 @@ class Word extends Node { // Strip the $ and expand escapes expanded = this._expandAnsiCEscapes(ansi_str.slice(1, ansi_str.length)); // Inside ${...} that's itself in double quotes, check if quotes should be stripped - outer_in_dquote = - quote_stack.length > 0 - ? quote_stack[quote_stack.length - 1][1] - : false; + outer_in_dquote = quote.outerDouble(); if ( brace_depth > 0 && outer_in_dquote && @@ -1457,6 +1560,7 @@ class Word extends Node { _formatCommandSubstitutions(value, in_arith) { let arith_depth, arith_paren_depth, + brace_quote, c, cmdsub_idx, cmdsub_parts, @@ -1475,15 +1579,13 @@ class Word extends Node { has_untracked_procsub, i, idx, - in_double, - in_double_quote, - in_single, inner, is_procsub, j, leading_brace, leading_ws, leading_ws_end, + main_quote, node, normalized_ws, p, @@ -1496,6 +1598,7 @@ class Word extends Node { raw_stripped, rest, result, + scan_quote, spaced, stripped, terminator; @@ -1528,12 +1631,12 @@ class Word extends Node { has_untracked_cmdsub = false; has_untracked_procsub = false; idx = 0; - in_double = false; + scan_quote = new QuoteState(); while (idx < value.length) { if (value[idx] === '"') { - in_double = !in_double; + scan_quote.double = !scan_quote.double; idx += 1; - } else if (value[idx] === "'" && !in_double) { + } else if (value[idx] === "'" && !scan_quote.double) { // Skip over single-quoted string (contents are literal) // But only when not inside double quotes idx += 1; @@ -1553,7 +1656,7 @@ class Word extends Node { break; } else if ( (_startsWithAt(value, idx, "<(") || _startsWithAt(value, idx, ">(")) && - !in_double + !scan_quote.double ) { // Only treat as process substitution if not preceded by alphanumeric or quote // (e.g., "i<(3)" is arithmetic comparison, not process substitution) @@ -1588,7 +1691,7 @@ class Word extends Node { i = 0; cmdsub_idx = 0; procsub_idx = 0; - in_double_quote = false; + main_quote = new QuoteState(); extglob_depth = 0; deprecated_arith_depth = 0; arith_depth = 0; @@ -1767,7 +1870,7 @@ class Word extends Node { i = j; } else if ( (_startsWithAt(value, i, ">(") || _startsWithAt(value, i, "<(")) && - !in_double_quote && + !main_quote.double && deprecated_arith_depth === 0 && arith_depth === 0 ) { @@ -1964,19 +2067,18 @@ class Word extends Node { // Find matching close brace, respecting nesting, quotes, and cmdsubs j = i + 2; depth = 1; - in_single = false; - in_double = false; + brace_quote = new QuoteState(); while (j < value.length && depth > 0) { c = value[j]; - if (c === "\\" && j + 1 < value.length && !in_single) { + if (c === "\\" && j + 1 < value.length && !brace_quote.single) { j += 2; continue; } - if (c === "'" && !in_double) { - in_single = !in_single; - } else if (c === '"' && !in_single) { - in_double = !in_double; - } else if (!in_single && !in_double) { + if (c === "'" && !brace_quote.double) { + brace_quote.single = !brace_quote.single; + } else if (c === '"' && !brace_quote.single) { + brace_quote.double = !brace_quote.double; + } else if (!brace_quote.inQuotes()) { // Skip over $(...) command substitutions if ( _startsWithAt(value, j, "$(") && @@ -2013,10 +2115,10 @@ class Word extends Node { i = j; } else if (value[i] === '"') { // Track double-quote state (single quotes inside double quotes are literal) - in_double_quote = !in_double_quote; + main_quote.double = !main_quote.double; result.push(value[i]); i += 1; - } else if (value[i] === "'" && !in_double_quote) { + } else if (value[i] === "'" && !main_quote.double) { // Skip single-quoted strings (contents are literal, don't look for cmdsubs) // But only when NOT inside double quotes (where single quotes are literal) j = i + 1; @@ -2040,21 +2142,21 @@ class Word extends Node { let current_part, deprecated_arith_depth, depth, + extglob_quote, has_pipe, i, - in_double_quote, part_content, pattern_parts, prefix_char, result; result = []; i = 0; - in_double_quote = false; + extglob_quote = new QuoteState(); deprecated_arith_depth = 0; while (i < value.length) { // Track double-quote state if (value[i] === '"') { - in_double_quote = !in_double_quote; + extglob_quote.double = !extglob_quote.double; result.push(value[i]); i += 1; continue; @@ -2078,7 +2180,7 @@ class Word extends Node { prefix_char = value[i]; if ( "><".includes(prefix_char) && - !in_double_quote && + !extglob_quote.double && deprecated_arith_depth === 0 ) { // Found pattern start @@ -4479,6 +4581,38 @@ function _skipBacktick(value, start) { return i; } +function _isValidArithmeticStart(value, start) { + let scan_c, scan_i, scan_paren; + scan_paren = 0; + scan_i = start + 3; + while (scan_i < value.length) { + scan_c = value[scan_i]; + // Skip over $( command subs - their parens shouldn't count + if ( + scan_c === "$" && + scan_i + 1 < value.length && + value[scan_i + 1] === "(" + ) { + scan_i = _findCmdsubEnd(value, scan_i + 2); + continue; + } + if (scan_c === "(") { + scan_paren += 1; + } else if (scan_c === ")") { + if (scan_paren > 0) { + scan_paren -= 1; + } else if (scan_i + 1 < value.length && value[scan_i + 1] === ")") { + return true; + } else { + // Single ) at top level without following ) - not valid arithmetic + return false; + } + } + scan_i += 1; + } + return false; +} + function _findCmdsubEnd(value, start) { let arith_depth, arith_paren_depth, @@ -4487,17 +4621,11 @@ function _findCmdsubEnd(value, start) { depth, i, in_case_patterns, - in_double, - in_single, - is_valid_arith, j, - scan_c, - scan_i, - scan_paren; + quote; depth = 1; i = start; - in_single = false; - in_double = false; + quote = new QuoteState(); case_depth = 0; in_case_patterns = false; arith_depth = 0; @@ -4505,26 +4633,26 @@ function _findCmdsubEnd(value, start) { while (i < value.length && depth > 0) { c = value[i]; // Handle escapes - if (c === "\\" && i + 1 < value.length && !in_single) { + if (c === "\\" && i + 1 < value.length && !quote.single) { i += 2; continue; } // Handle quotes - if (c === "'" && !in_double) { - in_single = !in_single; + if (c === "'" && !quote.double) { + quote.single = !quote.single; i += 1; continue; } - if (c === '"' && !in_single) { - in_double = !in_double; + if (c === '"' && !quote.single) { + quote.double = !quote.double; i += 1; continue; } - if (in_single) { + if (quote.single) { i += 1; continue; } - if (in_double) { + if (quote.double) { // Inside double quotes, $() command substitution is still active if (_startsWithAt(value, i, "$(") && !_startsWithAt(value, i, "$((")) { // Recursively find end of nested command substitution @@ -4594,43 +4722,13 @@ function _findCmdsubEnd(value, start) { continue; } // Handle arithmetic expressions $(( - // Check for valid arithmetic by scanning for closing )) at top level if (_startsWithAt(value, i, "$((")) { - is_valid_arith = true; - scan_paren = 0; - scan_i = i + 3; - while (scan_i < value.length) { - scan_c = value[scan_i]; - // Skip over $( command subs - their parens shouldn't count - if ( - scan_c === "$" && - scan_i + 1 < value.length && - value[scan_i + 1] === "(" - ) { - scan_i = _findCmdsubEnd(value, scan_i + 2); - continue; - } - if (scan_c === "(") { - scan_paren += 1; - } else if (scan_c === ")") { - if (scan_paren > 0) { - scan_paren -= 1; - } else if (scan_i + 1 < value.length && value[scan_i + 1] === ")") { - break; - } else { - // Single ) at top level without following ) - not valid arithmetic - is_valid_arith = false; - break; - } - } - scan_i += 1; - } - if (is_valid_arith) { + if (_isValidArithmeticStart(value, i)) { arith_depth += 1; i += 3; continue; } - // else: not valid arithmetic, treat $( as nested cmdsub and ( as paren + // Not valid arithmetic, treat $( as nested cmdsub and ( as paren j = _findCmdsubEnd(value, i + 2); i = j; continue; @@ -5457,6 +5555,8 @@ class Parser { this._cmdsub_heredoc_end = null; this._saw_newline_in_single_quote = false; this._in_process_sub = in_process_sub; + // Context stack for tracking nested parsing scopes + this._ctx = new ContextStack(); } atEnd() { @@ -5480,6 +5580,26 @@ class Parser { return ch; } + peekAt(offset) { + let pos; + pos = this.pos + offset; + if (pos < 0 || pos >= this.length) { + return ""; + } + return this.source[pos]; + } + + lookahead(n) { + return this.source.slice(this.pos, this.pos + n); + } + + matchKeyword(keyword) { + if (!_startsWithAt(this.source, this.pos, keyword)) { + return false; + } + return _isWordBoundary(this.source, this.pos, keyword.length); + } + _isBangFollowedByProcsub() { let next_char; if (this.pos + 2 >= this.length) { @@ -5503,11 +5623,7 @@ class Parser { while (!this.atEnd() && this.peek() !== "\n") { this.advance(); } - } else if ( - ch === "\\" && - this.pos + 1 < this.length && - this.source[this.pos + 1] === "\n" - ) { + } else if (ch === "\\" && this.peekAt(1) === "\n") { // Backslash-newline is line continuation - skip both this.advance(); this.advance(); @@ -5540,11 +5656,7 @@ class Parser { while (!this.atEnd() && this.peek() !== "\n") { this.advance(); } - } else if ( - ch === "\\" && - this.pos + 1 < this.length && - this.source[this.pos + 1] === "\n" - ) { + } else if (ch === "\\" && this.peekAt(1) === "\n") { // Backslash-newline is line continuation - skip both this.advance(); this.advance(); @@ -6937,7 +7049,7 @@ class Parser { } _isAssignmentWord(word) { - let bracket_depth, ch, i, in_double, in_single; + let bracket_depth, ch, i, quote; // Assignment must start with identifier (letter or underscore), not quoted if ( !word.value || @@ -6945,33 +7057,26 @@ class Parser { ) { return false; } - in_single = false; - in_double = false; + quote = new QuoteState(); bracket_depth = 0; i = 0; while (i < word.value.length) { ch = word.value[i]; - if (ch === "'" && !in_double) { - in_single = !in_single; - } else if (ch === '"' && !in_single) { - in_double = !in_double; - } else if (ch === "\\" && !in_single && i + 1 < word.value.length) { + if (ch === "'" && !quote.double) { + quote.single = !quote.single; + } else if (ch === '"' && !quote.single) { + quote.double = !quote.double; + } else if (ch === "\\" && !quote.single && i + 1 < word.value.length) { i += 1; continue; - } else if (ch === "[" && !in_single && !in_double) { + } else if (ch === "[" && !quote.inQuotes()) { bracket_depth += 1; - } else if (ch === "]" && !in_single && !in_double) { + } else if (ch === "]" && !quote.inQuotes()) { bracket_depth -= 1; - } else if ( - ch === "=" && - !in_single && - !in_double && - bracket_depth === 0 - ) { + } else if (ch === "=" && !quote.inQuotes() && bracket_depth === 0) { return true; } else if ( - !in_single && - !in_double && + !quote.inQuotes() && bracket_depth === 0 && !(/^[a-zA-Z0-9]$/.test(ch) || ch === "_") ) { @@ -8902,17 +9007,15 @@ class Parser { depth, dollar_count, formatted, - in_double_inner, - in_double_quote, - in_single, - in_single_quote, inner, + inner_quote, next_c, op, param, paren_depth, parsed, pc, + quote, sub_parser, suffix, text, @@ -9063,18 +9166,17 @@ class Parser { // Must track quotes - inside subscripts, quotes span until closed depth = 1; content_chars = []; - in_single = false; - in_double_inner = false; + inner_quote = new QuoteState(); while (!this.atEnd() && depth > 0) { c = this.peek(); - if (in_single) { + if (inner_quote.single) { content_chars.push(this.advance()); if (c === "'") { - in_single = false; + inner_quote.single = false; } continue; } - if (in_double_inner) { + if (inner_quote.double) { if (c === "\\" && this.pos + 1 < this.length) { content_chars.push(this.advance()); if (!this.atEnd()) { @@ -9084,17 +9186,17 @@ class Parser { } content_chars.push(this.advance()); if (c === '"') { - in_double_inner = false; + inner_quote.double = false; } continue; } if (c === "'") { - in_single = true; + inner_quote.single = true; content_chars.push(this.advance()); continue; } if (c === '"') { - in_double_inner = true; + inner_quote.double = true; content_chars.push(this.advance()); continue; } @@ -9226,19 +9328,18 @@ class Parser { // Track quote state and nesting arg_chars = []; depth = 1; - in_single_quote = false; - in_double_quote = false; + quote = new QuoteState(); while (!this.atEnd() && depth > 0) { c = this.peek(); // Single quotes - no escapes, just scan to closing quote - if (c === "'" && !in_double_quote) { - in_single_quote = !in_single_quote; + if (c === "'" && !quote.double) { + quote.single = !quote.single; arg_chars.push(this.advance()); - } else if (c === '"' && !in_single_quote) { + } else if (c === '"' && !quote.single) { // Double quotes - toggle state - in_double_quote = !in_double_quote; + quote.double = !quote.double; arg_chars.push(this.advance()); - } else if (c === "\\" && !in_single_quote) { + } else if (c === "\\" && !quote.single) { // Escape - skip next char (line continuation removes both) if (this.pos + 1 < this.length && this.source[this.pos + 1] === "\n") { // Line continuation - skip both backslash and newline @@ -9252,7 +9353,7 @@ class Parser { } } else if ( c === "$" && - !in_single_quote && + !quote.single && this.pos + 1 < this.length && this.source[this.pos + 1] === "{" ) { @@ -9262,7 +9363,7 @@ class Parser { arg_chars.push(this.advance()); } else if ( c === "$" && - !in_single_quote && + !quote.single && this.pos + 1 < this.length && this.source[this.pos + 1] === "'" ) { @@ -9285,8 +9386,8 @@ class Parser { } } else if ( c === "$" && - !in_single_quote && - !in_double_quote && + !quote.single && + !quote.double && this.pos + 1 < this.length && this.source[this.pos + 1] === '"' ) { @@ -9297,7 +9398,7 @@ class Parser { if (dollar_count % 2 === 1) { // Odd count: locale string $"..." - strip the $ and enter double quote this.advance(); - in_double_quote = true; + quote.double = true; arg_chars.push(this.advance()); } else { // Even count: this $ is part of $$ (PID), keep it @@ -9305,7 +9406,7 @@ class Parser { } } else if ( c === "$" && - !in_single_quote && + !quote.single && this.pos + 1 < this.length && this.source[this.pos + 1] === "(" ) { @@ -9328,7 +9429,7 @@ class Parser { } arg_chars.push(this.advance()); } - } else if (c === "`" && !in_single_quote) { + } else if (c === "`" && !quote.single) { // Backtick command substitution - scan to matching ` backtick_start = this.pos; arg_chars.push(this.advance()); @@ -9348,10 +9449,10 @@ class Parser { arg_chars.push(this.advance()); } else if (c === "}") { // Closing brace - handle depth for nested ${...} - if (in_single_quote) { + if (quote.single) { // Inside single quotes, } is literal arg_chars.push(this.advance()); - } else if (in_double_quote) { + } else if (quote.double) { // Inside double quotes, } can close nested ${...} if (depth > 1) { depth -= 1; @@ -9395,38 +9496,37 @@ class Parser { } _paramSubscriptHasClose(start_pos) { - let c, depth, i, in_double, in_single; + let c, depth, i, quote; depth = 1; i = start_pos + 1; - in_single = false; - in_double = false; + quote = new QuoteState(); while (i < this.length) { c = this.source[i]; - if (in_single) { + if (quote.single) { if (c === "'") { - in_single = false; + quote.single = false; } i += 1; continue; } - if (in_double) { + if (quote.double) { if (c === "\\" && i + 1 < this.length) { i += 2; continue; } if (c === '"') { - in_double = false; + quote.double = false; } i += 1; continue; } if (c === "'") { - in_single = true; + quote.single = true; i += 1; continue; } if (c === '"') { - in_double = true; + quote.double = true; i += 1; continue; } @@ -9451,7 +9551,7 @@ class Parser { } _consumeParamName() { - let bracket_depth, c, ch, in_double_sub, in_single, name_chars, sc; + let bracket_depth, c, ch, name_chars, sc, subscript_quote; if (this.atEnd()) { return null; } @@ -9491,18 +9591,17 @@ class Parser { // Array subscript - track bracket depth and quotes name_chars.push(this.advance()); bracket_depth = 1; - in_single = false; - in_double_sub = false; + subscript_quote = new QuoteState(); while (!this.atEnd() && bracket_depth > 0) { sc = this.peek(); - if (in_single) { + if (subscript_quote.single) { name_chars.push(this.advance()); if (sc === "'") { - in_single = false; + subscript_quote.single = false; } continue; } - if (in_double_sub) { + if (subscript_quote.double) { if (sc === "\\" && this.pos + 1 < this.length) { name_chars.push(this.advance()); if (!this.atEnd()) { @@ -9512,12 +9611,12 @@ class Parser { } name_chars.push(this.advance()); if (sc === '"') { - in_double_sub = false; + subscript_quote.double = false; } continue; } if (sc === "'") { - in_single = true; + subscript_quote.single = true; name_chars.push(this.advance()); continue; } @@ -9528,12 +9627,12 @@ class Parser { ) { // Locale string $"..." - strip the $ and enter double quote this.advance(); - in_double_sub = true; + subscript_quote.double = true; name_chars.push(this.advance()); continue; } if (sc === '"') { - in_double_sub = true; + subscript_quote.double = true; name_chars.push(this.advance()); continue; } diff --git a/src/parable.py b/src/parable.py index a5553aee..97ebf2bc 100644 --- a/src/parable.py +++ b/src/parable.py @@ -111,6 +111,136 @@ def _repeat_str(s: str, n: int) -> str: return "".join(result) +class QuoteState: + """Unified quote state tracker for parsing. + + Tracks single and double quote state, with stack support for nested contexts + like command substitutions inside parameter expansions. + """ + + def __init__(self): + self.single = False + self.double = False + self._stack: list[tuple[bool, bool]] = [] + + def toggle_single(self) -> None: + """Toggle single quote state if not inside double quotes.""" + if not self.double: + self.single = not self.single + + def toggle_double(self) -> None: + """Toggle double quote state if not inside single quotes.""" + if not self.single: + self.double = not self.double + + def push(self) -> None: + """Push current state onto stack and reset for nested context.""" + self._stack.append((self.single, self.double)) + self.single = False + self.double = False + + def pop(self) -> None: + """Restore quote state from stack.""" + if self._stack: + self.single, self.double = self._stack.pop() + + def in_quotes(self) -> bool: + """Return True if inside any quotes.""" + return self.single or self.double + + def process_char(self, c: str, prev_escaped: bool = False) -> None: + """Process a character, updating quote state. + + Args: + c: The character to process + prev_escaped: True if character is preceded by an unescaped backslash + """ + if prev_escaped: + return + if c == "'" and not self.double: + self.single = not self.single + elif c == '"' and not self.single: + self.double = not self.double + + def copy(self) -> "QuoteState": + """Create a copy of this quote state.""" + qs = QuoteState() + qs.single = self.single + qs.double = self.double + qs._stack = list(self._stack) + return qs + + def outer_double(self) -> bool: + """Return True if the outer (parent) context is in double quotes.""" + if len(self._stack) == 0: + return False + return self._stack[len(self._stack) - 1][1] + + def get_depth(self) -> int: + """Return the current stack depth.""" + return len(self._stack) + + +class ParseContext: + """Context for parsing state within a specific scope. + + Tracks context type, nesting depths, and quote state for a single parsing scope. + Used with ContextStack to manage nested contexts like command substitutions, + arithmetic expressions, and case patterns. + """ + + # Context kind constants + NORMAL = 0 + COMMAND_SUB = 1 + ARITHMETIC = 2 + CASE_PATTERN = 3 + BRACE_EXPANSION = 4 + + def __init__(self, kind: int = 0): + self.kind = kind + self.paren_depth = 0 + self.brace_depth = 0 + self.bracket_depth = 0 + self.quote = QuoteState() + + +class ContextStack: + """Stack of parsing contexts for tracking nested scopes. + + Maintains a stack of ParseContext objects to handle nested structures like + command substitutions inside arithmetic expressions inside case patterns. + Always has at least one context (NORMAL) on the stack. + """ + + def __init__(self): + self._stack: list[ParseContext] = [ParseContext()] + + def get_current(self) -> ParseContext: + """Return the current (topmost) context.""" + return self._stack[len(self._stack) - 1] + + def push(self, kind: int) -> None: + """Push a new context onto the stack.""" + self._stack.append(ParseContext(kind)) + + def pop(self) -> ParseContext: + """Pop and return the top context. Never pops the base context.""" + if len(self._stack) > 1: + return self._stack.pop() + return self._stack[0] + + def in_context(self, kind: int) -> bool: + """Return True if any context in the stack has the given kind.""" + for ctx in self._stack: + if ctx.kind == kind: + return True + return False + + def get_depth(self) -> int: + """Return the current stack depth.""" + return len(self._stack) + + def _strip_line_continuations_comment_aware(text: str) -> str: """Strip backslash-newline line continuations, preserving newlines in comments. @@ -120,8 +250,7 @@ def _strip_line_continuations_comment_aware(text: str) -> str: result = [] i = 0 in_comment = False - in_single = False - in_double = False + quote = QuoteState() while i < len(text): c = text[i] if c == "\\" and i + 1 < len(text) and text[i + 1] == "\n": @@ -147,11 +276,11 @@ def _strip_line_continuations_comment_aware(text: str) -> str: result.append(c) i += 1 continue - if c == "'" and not in_double and not in_comment: - in_single = not in_single - elif c == '"' and not in_single and not in_comment: - in_double = not in_double - elif c == "#" and not in_single and not in_comment: + if c == "'" and not quote.double and not in_comment: + quote.single = not quote.single + elif c == '"' and not quote.single and not in_comment: + quote.double = not quote.double + elif c == "#" and not quote.single and not in_comment: in_comment = True result.append(c) i += 1 @@ -226,19 +355,18 @@ def _append_with_ctlesc(self, result: bytearray, byte_val: int): def _double_ctlesc_smart(self, value: str) -> str: """Double CTLESC bytes unless escaped by backslash inside double quotes.""" result = [] - in_single = False - in_double = False + quote = QuoteState() for c in value: # Track quote state - if c == "'" and not in_double: - in_single = not in_single - elif c == '"' and not in_single: - in_double = not in_double + if c == "'" and not quote.double: + quote.single = not quote.single + elif c == '"' and not quote.single: + quote.double = not quote.double result.append(c) if c == "\x01": # Only count backslashes in double-quoted context (where they escape) # In single quotes, backslashes are literal, so always double CTLESC - if in_double: + if quote.double: bs_count = 0 for j in range(len(result) - 2, -1, -1): if result[j] == "\\": @@ -260,21 +388,20 @@ def _normalize_param_expansion_newlines(self, value: str) -> str: """ result = [] i = 0 - in_single = False - in_double = False + quote = QuoteState() while i < len(value): c = value[i] # Track quote state - if c == "'" and not in_double: - in_single = not in_single + if c == "'" and not quote.double: + quote.single = not quote.single result.append(c) i += 1 - elif c == '"' and not in_single: - in_double = not in_double + elif c == '"' and not quote.single: + quote.double = not quote.double result.append(c) i += 1 # Check for ${ param expansion - elif c == "$" and i + 1 < len(value) and value[i + 1] == "{" and not in_single: + elif c == "$" and i + 1 < len(value) and value[i + 1] == "{" and not quote.single: result.append("$") result.append("{") i += 2 @@ -287,7 +414,7 @@ def _normalize_param_expansion_newlines(self, value: str) -> str: depth = 1 while i < len(value) and depth > 0: ch = value[i] - if ch == "\\" and i + 1 < len(value) and not in_single: + if ch == "\\" and i + 1 < len(value) and not quote.single: if value[i + 1] == "\n": i += 2 continue @@ -295,11 +422,11 @@ def _normalize_param_expansion_newlines(self, value: str) -> str: result.append(value[i + 1]) i += 2 continue - if ch == "'" and not in_double: - in_single = not in_single - elif ch == '"' and not in_single: - in_double = not in_double - elif not in_single and not in_double: + if ch == "'" and not quote.double: + quote.single = not quote.single + elif ch == '"' and not quote.single: + quote.double = not quote.double + elif not quote.in_quotes(): if ch == "{": depth += 1 elif ch == "}": @@ -458,15 +585,13 @@ def _expand_all_ansi_c_quotes(self, value: str) -> str: """Find and expand ALL $'...' ANSI-C quoted strings in value.""" result = [] i = 0 - in_single_quote = False - in_double_quote = False + quote = QuoteState() in_backtick = False # Track backtick substitutions - don't expand inside brace_depth = 0 # Track ${...} nesting - inside braces, $'...' is expanded - quote_stack: list[tuple[bool, bool]] = [] while i < len(value): ch = value[i] # Track backtick context - don't expand $'...' inside backticks - if ch == "`" and not in_single_quote: + if ch == "`" and not quote.single: in_backtick = not in_backtick result.append(ch) i += 1 @@ -482,49 +607,46 @@ def _expand_all_ansi_c_quotes(self, value: str) -> str: i += 1 continue # Track brace depth for parameter expansions - if not in_single_quote: + if not quote.single: if _starts_with_at(value, i, "${"): brace_depth += 1 - quote_stack.append((in_single_quote, in_double_quote)) - in_single_quote = False - in_double_quote = False + quote.push() result.append("${") i += 2 continue - elif ch == "}" and brace_depth > 0 and not in_double_quote: + elif ch == "}" and brace_depth > 0 and not quote.double: brace_depth -= 1 result.append(ch) - if quote_stack: - in_single_quote, in_double_quote = quote_stack.pop() + quote.pop() i += 1 continue # Double quotes inside ${...} still protect $'...' from expansion - effective_in_dquote = in_double_quote + effective_in_dquote = quote.double # Track quote state to avoid matching $' inside regular quotes if ch == "'" and not effective_in_dquote: # Toggle quote state unless this is $' that will be expanded as ANSI-C is_ansi_c = ( - not in_single_quote + not quote.single and i > 0 and value[i - 1] == "$" and _count_consecutive_dollars_before(value, i - 1) % 2 == 0 ) if not is_ansi_c: - in_single_quote = not in_single_quote + quote.single = not quote.single result.append(ch) i += 1 - elif ch == '"' and not in_single_quote: - in_double_quote = not in_double_quote + elif ch == '"' and not quote.single: + quote.double = not quote.double result.append(ch) i += 1 - elif ch == "\\" and i + 1 < len(value) and not in_single_quote: + elif ch == "\\" and i + 1 < len(value) and not quote.single: # Backslash escape - skip both chars to avoid misinterpreting \" or \' result.append(ch) result.append(value[i + 1]) i += 2 elif ( _starts_with_at(value, i, "$'") - and not in_single_quote + and not quote.single and not effective_in_dquote and _count_consecutive_dollars_before(value, i) % 2 == 0 ): @@ -545,9 +667,7 @@ def _expand_all_ansi_c_quotes(self, value: str) -> str: _substring(ansi_str, 1, len(ansi_str)) ) # Pass 'hello\nworld' # Inside ${...} that's itself in double quotes, check if quotes should be stripped - outer_in_dquote = ( - quote_stack[len(quote_stack) - 1][1] if len(quote_stack) > 0 else False - ) + outer_in_dquote = quote.outer_double() if ( brace_depth > 0 and outer_in_dquote @@ -1188,12 +1308,12 @@ def _format_command_substitutions(self, value: str, in_arith: bool = False) -> s has_untracked_cmdsub = False has_untracked_procsub = False idx = 0 - in_double = False + scan_quote = QuoteState() while idx < len(value): if value[idx] == '"': - in_double = not in_double + scan_quote.double = not scan_quote.double idx += 1 - elif value[idx] == "'" and not in_double: + elif value[idx] == "'" and not scan_quote.double: # Skip over single-quoted string (contents are literal) # But only when not inside double quotes idx += 1 @@ -1211,7 +1331,7 @@ def _format_command_substitutions(self, value: str, in_arith: bool = False) -> s break elif ( _starts_with_at(value, idx, "<(") or _starts_with_at(value, idx, ">(") - ) and not in_double: + ) and not scan_quote.double: # Only treat as process substitution if not preceded by alphanumeric or quote # (e.g., "i<(3)" is arithmetic comparison, not process substitution) # Also don't treat as process substitution inside double quotes or after quotes @@ -1236,7 +1356,7 @@ def _format_command_substitutions(self, value: str, in_arith: bool = False) -> s i = 0 cmdsub_idx = 0 procsub_idx = 0 - in_double_quote = False + main_quote = QuoteState() extglob_depth = 0 deprecated_arith_depth = 0 # Track $[...] depth arith_depth = 0 # Track $((...)) depth @@ -1381,7 +1501,7 @@ def _format_command_substitutions(self, value: str, in_arith: bool = False) -> s # Check for >( or <( process substitution (not inside double quotes, $[...], or $((...))) elif ( (_starts_with_at(value, i, ">(") or _starts_with_at(value, i, "<(")) - and not in_double_quote + and not main_quote.double and deprecated_arith_depth == 0 and arith_depth == 0 ): @@ -1536,18 +1656,17 @@ def _format_command_substitutions(self, value: str, in_arith: bool = False) -> s # Find matching close brace, respecting nesting, quotes, and cmdsubs j = i + 2 depth = 1 - in_single = False - in_double = False + brace_quote = QuoteState() while j < len(value) and depth > 0: c = value[j] - if c == "\\" and j + 1 < len(value) and not in_single: + if c == "\\" and j + 1 < len(value) and not brace_quote.single: j += 2 continue - if c == "'" and not in_double: - in_single = not in_single - elif c == '"' and not in_single: - in_double = not in_double - elif not in_single and not in_double: + if c == "'" and not brace_quote.double: + brace_quote.single = not brace_quote.single + elif c == '"' and not brace_quote.single: + brace_quote.double = not brace_quote.double + elif not brace_quote.in_quotes(): # Skip over $(...) command substitutions if _starts_with_at(value, j, "$(") and not _starts_with_at(value, j, "$(("): j = _find_cmdsub_end(value, j + 2) @@ -1575,12 +1694,12 @@ def _format_command_substitutions(self, value: str, in_arith: bool = False) -> s i = j # Track double-quote state (single quotes inside double quotes are literal) elif value[i] == '"': - in_double_quote = not in_double_quote + main_quote.double = not main_quote.double result.append(value[i]) i += 1 # Skip single-quoted strings (contents are literal, don't look for cmdsubs) # But only when NOT inside double quotes (where single quotes are literal) - elif value[i] == "'" and not in_double_quote: + elif value[i] == "'" and not main_quote.double: j = i + 1 while j < len(value) and value[j] != "'": j += 1 @@ -1597,12 +1716,12 @@ def _normalize_extglob_whitespace(self, value: str) -> str: """Normalize whitespace around | in >() and <() patterns for regex contexts.""" result = [] i = 0 - in_double_quote = False + extglob_quote = QuoteState() deprecated_arith_depth = 0 # Track $[...] depth while i < len(value): # Track double-quote state if value[i] == '"': - in_double_quote = not in_double_quote + extglob_quote.double = not extglob_quote.double result.append(value[i]) i += 1 continue @@ -1621,7 +1740,7 @@ def _normalize_extglob_whitespace(self, value: str) -> str: # Only process these patterns when NOT inside double quotes or $[...] if i + 1 < len(value) and value[i + 1] == "(": prefix_char = value[i] - if prefix_char in "><" and not in_double_quote and deprecated_arith_depth == 0: + if prefix_char in "><" and not extglob_quote.double and deprecated_arith_depth == 0: # Found pattern start result.append(prefix_char) result.append("(") @@ -3746,6 +3865,35 @@ def _skip_backtick(value: str, start: int) -> int: return i +def _is_valid_arithmetic_start(value: str, start: int) -> bool: + """Check if $(( at position starts a valid arithmetic expression. + + Scans forward looking for )) at the top paren level (excluding nested $()). + Returns True if valid arithmetic, False if this is actually $( ( ... ) ) + (command substitution containing a subshell). + """ + scan_paren = 0 + scan_i = start + 3 # Skip past $(( + while scan_i < len(value): + scan_c = value[scan_i] + # Skip over $( command subs - their parens shouldn't count + if scan_c == "$" and scan_i + 1 < len(value) and value[scan_i + 1] == "(": + scan_i = _find_cmdsub_end(value, scan_i + 2) + continue + if scan_c == "(": + scan_paren += 1 + elif scan_c == ")": + if scan_paren > 0: + scan_paren -= 1 + elif scan_i + 1 < len(value) and value[scan_i + 1] == ")": + return True # Found )) at top level, valid arithmetic + else: + # Single ) at top level without following ) - not valid arithmetic + return False + scan_i += 1 + return False # Never found )) + + def _find_cmdsub_end(value: str, start: int) -> int: """Find the end of a $(...) command substitution, handling case statements. @@ -3753,8 +3901,7 @@ def _find_cmdsub_end(value: str, start: int) -> int: """ depth = 1 i = start - in_single = False - in_double = False + quote = QuoteState() case_depth = 0 # Track nested case statements in_case_patterns = False # After 'in' but before first ;; or esac arith_depth = 0 # Track nested arithmetic expressions @@ -3762,22 +3909,22 @@ def _find_cmdsub_end(value: str, start: int) -> int: while i < len(value) and depth > 0: c = value[i] # Handle escapes - if c == "\\" and i + 1 < len(value) and not in_single: + if c == "\\" and i + 1 < len(value) and not quote.single: i += 2 continue # Handle quotes - if c == "'" and not in_double: - in_single = not in_single + if c == "'" and not quote.double: + quote.single = not quote.single i += 1 continue - if c == '"' and not in_single: - in_double = not in_double + if c == '"' and not quote.single: + quote.double = not quote.double i += 1 continue - if in_single: + if quote.single: i += 1 continue - if in_double: + if quote.double: # Inside double quotes, $() command substitution is still active if _starts_with_at(value, i, "$(") and not _starts_with_at(value, i, "$(("): # Recursively find end of nested command substitution @@ -3836,34 +3983,12 @@ def _find_cmdsub_end(value: str, start: int) -> int: i += 1 continue # Handle arithmetic expressions $(( - # Check for valid arithmetic by scanning for closing )) at top level if _starts_with_at(value, i, "$(("): - is_valid_arith = True - scan_paren = 0 - scan_i = i + 3 - while scan_i < len(value): - scan_c = value[scan_i] - # Skip over $( command subs - their parens shouldn't count - if scan_c == "$" and scan_i + 1 < len(value) and value[scan_i + 1] == "(": - scan_i = _find_cmdsub_end(value, scan_i + 2) - continue - if scan_c == "(": - scan_paren += 1 - elif scan_c == ")": - if scan_paren > 0: - scan_paren -= 1 - elif scan_i + 1 < len(value) and value[scan_i + 1] == ")": - break # Found )) at top level, valid arithmetic - else: - # Single ) at top level without following ) - not valid arithmetic - is_valid_arith = False - break - scan_i += 1 - if is_valid_arith: + if _is_valid_arithmetic_start(value, i): arith_depth += 1 i += 3 continue - # else: not valid arithmetic, treat $( as nested cmdsub and ( as paren + # Not valid arithmetic, treat $( as nested cmdsub and ( as paren j = _find_cmdsub_end(value, i + 2) i = j continue @@ -4541,6 +4666,8 @@ def __init__(self, source: str, in_process_sub: bool = False): self._cmdsub_heredoc_end: int | None = None self._saw_newline_in_single_quote = False self._in_process_sub = in_process_sub + # Context stack for tracking nested parsing scopes + self._ctx = ContextStack() def at_end(self) -> bool: """Check if we've reached the end of input.""" @@ -4560,6 +4687,26 @@ def advance(self) -> str | None: self.pos += 1 return ch + def peek_at(self, offset: int) -> str: + """Peek at character at offset from current position. + + Returns empty string if position is out of bounds. + """ + pos = self.pos + offset + if pos < 0 or pos >= self.length: + return "" + return self.source[pos] + + def lookahead(self, n: int) -> str: + """Return next n characters without consuming.""" + return _substring(self.source, self.pos, self.pos + n) + + def match_keyword(self, keyword: str) -> bool: + """Check if current position matches keyword with word boundary.""" + if not _starts_with_at(self.source, self.pos, keyword): + return False + return _is_word_boundary(self.source, self.pos, len(keyword)) + def _is_bang_followed_by_procsub(self) -> bool: """Check if ! at current position is followed by >( or <( process substitution.""" if self.pos + 2 >= self.length: @@ -4579,7 +4726,7 @@ def skip_whitespace(self) -> None: # Skip comment to end of line (but not the newline itself) while not self.at_end() and self.peek() != "\n": self.advance() - elif ch == "\\" and self.pos + 1 < self.length and self.source[self.pos + 1] == "\n": + elif ch == "\\" and self.peek_at(1) == "\n": # Backslash-newline is line continuation - skip both self.advance() self.advance() @@ -4603,7 +4750,7 @@ def skip_whitespace_and_newlines(self) -> None: # Skip comment to end of line while not self.at_end() and self.peek() != "\n": self.advance() - elif ch == "\\" and self.pos + 1 < self.length and self.source[self.pos + 1] == "\n": + elif ch == "\\" and self.peek_at(1) == "\n": # Backslash-newline is line continuation - skip both self.advance() self.advance() @@ -5640,31 +5787,25 @@ def _is_assignment_word(self, word: "Word") -> bool: # Assignment must start with identifier (letter or underscore), not quoted if not word.value or not (word.value[0].isalpha() or word.value[0] == "_"): return False - in_single = False - in_double = False + quote = QuoteState() bracket_depth = 0 i = 0 while i < len(word.value): ch = word.value[i] - if ch == "'" and not in_double: - in_single = not in_single - elif ch == '"' and not in_single: - in_double = not in_double - elif ch == "\\" and not in_single and i + 1 < len(word.value): + if ch == "'" and not quote.double: + quote.single = not quote.single + elif ch == '"' and not quote.single: + quote.double = not quote.double + elif ch == "\\" and not quote.single and i + 1 < len(word.value): i += 1 # Skip next char continue - elif ch == "[" and not in_single and not in_double: + elif ch == "[" and not quote.in_quotes(): bracket_depth += 1 - elif ch == "]" and not in_single and not in_double: + elif ch == "]" and not quote.in_quotes(): bracket_depth -= 1 - elif ch == "=" and not in_single and not in_double and bracket_depth == 0: + elif ch == "=" and not quote.in_quotes() and bracket_depth == 0: return True - elif ( - not in_single - and not in_double - and bracket_depth == 0 - and not (ch.isalnum() or ch == "_") - ): + elif not quote.in_quotes() and bracket_depth == 0 and not (ch.isalnum() or ch == "_"): # Invalid char in identifier part before = return False i += 1 @@ -7394,16 +7535,15 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: # Must track quotes - inside subscripts, quotes span until closed depth = 1 content_chars: list[str] = [] - in_single = False - in_double_inner = False + inner_quote = QuoteState() while not self.at_end() and depth > 0: c = self.peek() - if in_single: + if inner_quote.single: content_chars.append(self.advance()) if c == "'": - in_single = False + inner_quote.single = False continue - if in_double_inner: + if inner_quote.double: if c == "\\" and self.pos + 1 < self.length: content_chars.append(self.advance()) if not self.at_end(): @@ -7411,14 +7551,14 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: continue content_chars.append(self.advance()) if c == '"': - in_double_inner = False + inner_quote.double = False continue if c == "'": - in_single = True + inner_quote.single = True content_chars.append(self.advance()) continue if c == '"': - in_double_inner = True + inner_quote.double = True content_chars.append(self.advance()) continue if c == "`": @@ -7523,20 +7663,19 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: # Track quote state and nesting arg_chars = [] depth = 1 - in_single_quote = False - in_double_quote = False + quote = QuoteState() while not self.at_end() and depth > 0: c = self.peek() # Single quotes - no escapes, just scan to closing quote - if c == "'" and not in_double_quote: - in_single_quote = not in_single_quote + if c == "'" and not quote.double: + quote.single = not quote.single arg_chars.append(self.advance()) # Double quotes - toggle state - elif c == '"' and not in_single_quote: - in_double_quote = not in_double_quote + elif c == '"' and not quote.single: + quote.double = not quote.double arg_chars.append(self.advance()) # Escape - skip next char (line continuation removes both) - elif c == "\\" and not in_single_quote: + elif c == "\\" and not quote.single: if self.pos + 1 < self.length and self.source[self.pos + 1] == "\n": # Line continuation - skip both backslash and newline self.advance() @@ -7548,7 +7687,7 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: # Nested ${...} - increase depth (outside single quotes) elif ( c == "$" - and not in_single_quote + and not quote.single and self.pos + 1 < self.length and self.source[self.pos + 1] == "{" ): @@ -7558,7 +7697,7 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: # ANSI-C quoted string $'...' - scan to matching ' with escapes elif ( c == "$" - and not in_single_quote + and not quote.single and self.pos + 1 < self.length and self.source[self.pos + 1] == "'" ): @@ -7577,8 +7716,8 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: # Locale string $"..." - strip $ and enter double quote elif ( c == "$" - and not in_single_quote - and not in_double_quote + and not quote.single + and not quote.double and self.pos + 1 < self.length and self.source[self.pos + 1] == '"' ): @@ -7587,7 +7726,7 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: if dollar_count % 2 == 1: # Odd count: locale string $"..." - strip the $ and enter double quote self.advance() # skip $ - in_double_quote = True + quote.double = True arg_chars.append(self.advance()) # append " else: # Even count: this $ is part of $$ (PID), keep it @@ -7595,7 +7734,7 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: # Command substitution $(...) - scan to matching ) elif ( c == "$" - and not in_single_quote + and not quote.single and self.pos + 1 < self.length and self.source[self.pos + 1] == "(" ): @@ -7615,7 +7754,7 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: continue arg_chars.append(self.advance()) # Backtick command substitution - scan to matching ` - elif c == "`" and not in_single_quote: + elif c == "`" and not quote.single: backtick_start = self.pos arg_chars.append(self.advance()) # opening ` while not self.at_end() and self.peek() != "`": @@ -7630,10 +7769,10 @@ def _parse_braced_param(self, start: int) -> tuple[Node | None, str]: arg_chars.append(self.advance()) # closing ` # Closing brace - handle depth for nested ${...} elif c == "}": - if in_single_quote: + if quote.single: # Inside single quotes, } is literal arg_chars.append(self.advance()) - elif in_double_quote: + elif quote.double: # Inside double quotes, } can close nested ${...} if depth > 1: depth -= 1 @@ -7674,29 +7813,28 @@ def _param_subscript_has_close(self, start_pos: int) -> bool: """Check for a matching ] in a parameter subscript before closing }.""" depth = 1 i = start_pos + 1 - in_single = False - in_double = False + quote = QuoteState() while i < self.length: c = self.source[i] - if in_single: + if quote.single: if c == "'": - in_single = False + quote.single = False i += 1 continue - if in_double: + if quote.double: if c == "\\" and i + 1 < self.length: i += 2 continue if c == '"': - in_double = False + quote.double = False i += 1 continue if c == "'": - in_single = True + quote.single = True i += 1 continue if c == '"': - in_double = True + quote.double = True i += 1 continue if c == "\\": @@ -7748,16 +7886,15 @@ def _consume_param_name(self) -> str | None: # Array subscript - track bracket depth and quotes name_chars.append(self.advance()) bracket_depth = 1 - in_single = False - in_double_sub = False + subscript_quote = QuoteState() while not self.at_end() and bracket_depth > 0: sc = self.peek() - if in_single: + if subscript_quote.single: name_chars.append(self.advance()) if sc == "'": - in_single = False + subscript_quote.single = False continue - if in_double_sub: + if subscript_quote.double: if sc == "\\" and self.pos + 1 < self.length: name_chars.append(self.advance()) if not self.at_end(): @@ -7765,10 +7902,10 @@ def _consume_param_name(self) -> str | None: continue name_chars.append(self.advance()) if sc == '"': - in_double_sub = False + subscript_quote.double = False continue if sc == "'": - in_single = True + subscript_quote.single = True name_chars.append(self.advance()) continue if ( @@ -7778,11 +7915,11 @@ def _consume_param_name(self) -> str | None: ): # Locale string $"..." - strip the $ and enter double quote self.advance() # skip $ - in_double_sub = True + subscript_quote.double = True name_chars.append(self.advance()) # append " continue if sc == '"': - in_double_sub = True + subscript_quote.double = True name_chars.append(self.advance()) continue if sc == "\\": diff --git a/tools/transpiler/src/transpiler/check_style.py b/tools/transpiler/src/transpiler/check_style.py index 64b99ef6..263d66e0 100644 --- a/tools/transpiler/src/transpiler/check_style.py +++ b/tools/transpiler/src/transpiler/check_style.py @@ -24,6 +24,8 @@ async def async def f(): avoid async generator expression (x for x in ...) explicit loop global global x pass as parameter hasattr hasattr(x, 'y') explicit field check + import import x not allowed (self-contained) + import from from x import y not allowed (self-contained) loop else for x: ... else: use flag variable list comprehension [x*2 for x in items] explicit loop match/case match x: if/elif chain @@ -218,6 +220,14 @@ def check_file(filepath): if isinstance(node, ast.Try) and node.orelse: errors.append((lineno, "try else: move else code after try block")) + # import + if isinstance(node, ast.Import): + errors.append((lineno, "import: not allowed, code must be self-contained")) + + # from ... import + if isinstance(node, ast.ImportFrom): + errors.append((lineno, "from import: not allowed, code must be self-contained")) + return errors