From af320d3ce3644a2bbbaac084b096e7a67910921d Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Wed, 26 Apr 2023 15:35:36 -0600
Subject: [PATCH 1/8] gh-103656: Transfer f-string buffers to parser to avoid
 use-after-free

---
 Grammar/python.gram      |  11 ++-
 Lib/test/test_fstring.py |  14 ++++
 Parser/action_helpers.c  |  71 +++++++++++-----
 Parser/parser.c          |  24 +++---
 Parser/pegen.c           |  12 +++
 Parser/pegen.h           |  13 ++-
 Parser/tokenizer.c       | 176 +++++++++++++++++++++------------------
 Parser/tokenizer.h       |   1 +
 8 files changed, 199 insertions(+), 123 deletions(-)

diff --git a/Grammar/python.gram b/Grammar/python.gram
index 3a356c65a75195..6361dcd0985b99 100644
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@@ -881,14 +881,13 @@ fstring_middle[expr_ty]:
     | fstring_replacement_field
     | t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
 fstring_replacement_field[expr_ty]:
-    | '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] '}' {
-        _PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
-    }
+    | '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
+        _PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
     | invalid_replacement_field
-fstring_conversion[expr_ty]:
+fstring_conversion[ResultTokenWithMetadata*]:
     | conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
-fstring_full_format_spec[expr_ty]:
-    | ':' spec=fstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
+fstring_full_format_spec[ResultTokenWithMetadata*]:
+    | colon=':' spec=fstring_format_spec* { _PyPegen_setup_full_format_spec(p, colon, (asdl_expr_seq *) spec, EXTRA) }
 fstring_format_spec[expr_ty]:
     | t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
     | fstring_replacement_field
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 9d5e16628f04b6..5e94c99ae65af1 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1535,5 +1535,19 @@ def test_not_closing_quotes(self):
         self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal",
                             ['f"""', "f'''"])
 
+    def test_syntax_error_after_debug(self):
+        self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'",
+                            [
+                                "f'{1=}{;'",
+                                "f'{1=}{+;'",
+                                "f'{1=}{2}{;'",
+                                "f'{1=}{3}{;'",
+                            ])
+        self.assertAllRaise(SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'",
+                            [
+                                "f'{1=}{1;'",
+                                "f'{1=}{1;}'",
+                            ])
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
index 55c0f6fdd620f4..168e01015fb761 100644
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@@ -965,17 +965,43 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
     return 0;
 }
 
-expr_ty
-_PyPegen_check_fstring_conversion(Parser *p, Token* symbol, expr_ty conv) {
-    if (symbol->lineno != conv->lineno || symbol->end_col_offset != conv->col_offset) {
+static ResultTokenWithMetadata *
+result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
+{
+    ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
+    if (res == NULL) {
+        return NULL;
+    }
+    res->metadata = metadata;
+    res->result = result;
+    return res;
+}
+
+ResultTokenWithMetadata *
+_PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
+{
+    if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
         return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
-            symbol, conv,
+            conv_token, conv,
             "f-string: conversion type must come right after the exclamanation mark"
         );
     }
-    return conv;
+    return result_token_with_metadata(p, conv, conv_token->metadata);
 }
 
+ResultTokenWithMetadata *
+_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
+                                int end_lineno, int end_col_offset, PyArena *arena)
+{
+    if (!spec) {
+        return NULL;
+    }
+    expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, end_col_offset, p->arena);
+    if (!res) {
+        return NULL;
+    }
+    return result_token_with_metadata(p, res, colon->metadata);
+}
 
 const char *
 _PyPegen_get_expr_name(expr_ty e)
@@ -1386,19 +1412,20 @@ expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
     return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
 }
 
-expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, expr_ty conversion,
-                                 expr_ty format, int lineno, int col_offset, int end_lineno, int end_col_offset,
-                                 PyArena *arena) {
+expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
+                                 ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
+                                 int end_lineno, int end_col_offset, PyArena *arena) {
     int conversion_val = -1;
     if (conversion != NULL) {
-        assert(conversion->kind == Name_kind);
-        Py_UCS4 first = PyUnicode_READ_CHAR(conversion->v.Name.id, 0);
+        expr_ty conversion_expr = (expr_ty) conversion->result;
+        assert(conversion_expr->kind == Name_kind);
+        Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);
 
-        if (PyUnicode_GET_LENGTH(conversion->v.Name.id) > 1 ||
+        if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 ||
             !(first == 's' || first == 'r' || first == 'a')) {
-            RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion,
+            RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr,
                                               "f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
-                                              conversion->v.Name.id);
+                                              conversion_expr->v.Name.id);
             return NULL;
         }
 
@@ -1410,7 +1437,7 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ex
     }
 
     expr_ty formatted_value = _PyAST_FormattedValue(
-        expression, conversion_val, format,
+        expression, conversion_val, format ? (expr_ty) format->result : NULL,
         lineno, col_offset, end_lineno,
         end_col_offset, arena
     );
@@ -1418,22 +1445,26 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ex
     if (debug) {
         /* Find the non whitespace token after the "=" */
         int debug_end_line, debug_end_offset;
+        PyObject *debug_metadata;
 
         if (conversion) {
-            debug_end_line = conversion->lineno;
-            debug_end_offset = conversion->col_offset;
+            debug_end_line = ((expr_ty) conversion->result)->lineno;
+            debug_end_offset = ((expr_ty) conversion->result)->col_offset;
+            debug_metadata = conversion->metadata;
         }
         else if (format) {
-            debug_end_line = format->lineno;
-            debug_end_offset = format->col_offset + 1; // HACK: ??
+            debug_end_line = ((expr_ty) format->result)->lineno;
+            debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
+            debug_metadata = format->metadata;
         }
         else {
             debug_end_line = end_lineno;
             debug_end_offset = end_col_offset;
+            debug_metadata = closing_brace->metadata;
         }
 
-        expr_ty debug_text = decode_fstring_buffer(p, lineno, col_offset + 1,
-                                                   debug_end_line, debug_end_offset - 1);
+        expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
+                                             debug_end_offset - 1, p->arena);
         if (!debug_text) {
             return NULL;
         }
diff --git a/Parser/parser.c b/Parser/parser.c
index 771366844fc489..6eb985a7d3e123 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -738,8 +738,8 @@ static NameDefaultPair* lambda_param_maybe_default_rule(Parser *p);
 static arg_ty lambda_param_rule(Parser *p);
 static expr_ty fstring_middle_rule(Parser *p);
 static expr_ty fstring_replacement_field_rule(Parser *p);
-static expr_ty fstring_conversion_rule(Parser *p);
-static expr_ty fstring_full_format_spec_rule(Parser *p);
+static ResultTokenWithMetadata* fstring_conversion_rule(Parser *p);
+static ResultTokenWithMetadata* fstring_full_format_spec_rule(Parser *p);
 static expr_ty fstring_format_spec_rule(Parser *p);
 static expr_ty string_rule(Parser *p);
 static expr_ty strings_rule(Parser *p);
@@ -15639,11 +15639,11 @@ fstring_replacement_field_rule(Parser *p)
         }
         D(fprintf(stderr, "%*c> fstring_replacement_field[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'{' (yield_expr | star_expressions) \"=\"? fstring_conversion? fstring_full_format_spec? '}'"));
         Token * _literal;
-        Token * _literal_1;
         void *a;
         void *conversion;
         void *debug_expr;
         void *format;
+        Token * rbrace;
         if (
             (_literal = _PyPegen_expect_token(p, 25))  // token='{'
             &&
@@ -15655,7 +15655,7 @@ fstring_replacement_field_rule(Parser *p)
             &&
             (format = fstring_full_format_spec_rule(p), !p->error_indicator)  // fstring_full_format_spec?
             &&
-            (_literal_1 = _PyPegen_expect_token(p, 26))  // token='}'
+            (rbrace = _PyPegen_expect_token(p, 26))  // token='}'
         )
         {
             D(fprintf(stderr, "%*c+ fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' (yield_expr | star_expressions) \"=\"? fstring_conversion? fstring_full_format_spec? '}'"));
@@ -15668,7 +15668,7 @@ fstring_replacement_field_rule(Parser *p)
             UNUSED(_end_lineno); // Only used by EXTRA macro
             int _end_col_offset = _token->end_col_offset;
             UNUSED(_end_col_offset); // Only used by EXTRA macro
-            _res = _PyPegen_formatted_value ( p , a , debug_expr , conversion , format , EXTRA );
+            _res = _PyPegen_formatted_value ( p , a , debug_expr , conversion , format , rbrace , EXTRA );
             if (_res == NULL && PyErr_Occurred()) {
                 p->error_indicator = 1;
                 p->level--;
@@ -15706,7 +15706,7 @@ fstring_replacement_field_rule(Parser *p)
 }
 
 // fstring_conversion: "!" NAME
-static expr_ty
+static ResultTokenWithMetadata*
 fstring_conversion_rule(Parser *p)
 {
     if (p->level++ == MAXSTACK) {
@@ -15717,7 +15717,7 @@ fstring_conversion_rule(Parser *p)
         p->level--;
         return NULL;
     }
-    expr_ty _res = NULL;
+    ResultTokenWithMetadata* _res = NULL;
     int _mark = p->mark;
     { // "!" NAME
         if (p->error_indicator) {
@@ -15753,7 +15753,7 @@ fstring_conversion_rule(Parser *p)
 }
 
 // fstring_full_format_spec: ':' fstring_format_spec*
-static expr_ty
+static ResultTokenWithMetadata*
 fstring_full_format_spec_rule(Parser *p)
 {
     if (p->level++ == MAXSTACK) {
@@ -15764,7 +15764,7 @@ fstring_full_format_spec_rule(Parser *p)
         p->level--;
         return NULL;
     }
-    expr_ty _res = NULL;
+    ResultTokenWithMetadata* _res = NULL;
     int _mark = p->mark;
     if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {
         p->error_indicator = 1;
@@ -15781,10 +15781,10 @@ fstring_full_format_spec_rule(Parser *p)
             return NULL;
         }
         D(fprintf(stderr, "%*c> fstring_full_format_spec[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':' fstring_format_spec*"));
-        Token * _literal;
+        Token * colon;
         asdl_seq * spec;
         if (
-            (_literal = _PyPegen_expect_token(p, 11))  // token=':'
+            (colon = _PyPegen_expect_token(p, 11))  // token=':'
             &&
             (spec = _loop0_112_rule(p))  // fstring_format_spec*
         )
@@ -15799,7 +15799,7 @@ fstring_full_format_spec_rule(Parser *p)
             UNUSED(_end_lineno); // Only used by EXTRA macro
             int _end_col_offset = _token->end_col_offset;
             UNUSED(_end_col_offset); // Only used by EXTRA macro
-            _res = spec ? _PyAST_JoinedStr ( ( asdl_expr_seq* ) spec , EXTRA ) : NULL;
+            _res = _PyPegen_setup_full_format_spec ( p , colon , ( asdl_expr_seq* ) spec , EXTRA );
             if (_res == NULL && PyErr_Occurred()) {
                 p->error_indicator = 1;
                 p->level--;
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 262bfabfba7a25..97f86e1372f7c0 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -155,6 +155,17 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
         return -1;
     }
 
+    if (new_token->metadata != NULL) {
+        parser_token->metadata = new_token->metadata;
+        if (_PyArena_AddPyObject(p->arena, parser_token->metadata) < 0) {
+            Py_DECREF(parser_token->metadata);
+            return -1;
+        }
+    }
+    else {
+        parser_token->metadata = NULL;
+    }
+
     parser_token->level = new_token->level;
     parser_token->lineno = new_token->lineno;
     parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset
@@ -198,6 +209,7 @@ int
 _PyPegen_fill_token(Parser *p)
 {
     struct token new_token;
+    new_token.metadata = NULL;
     int type = _PyTokenizer_Get(p->tok, &new_token);
 
     // Record and skip '# type: ignore' comments
diff --git a/Parser/pegen.h b/Parser/pegen.h
index 6962013c2d18b4..8800e9f97f5e04 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -39,6 +39,7 @@ typedef struct {
     int level;
     int lineno, col_offset, end_lineno, end_col_offset;
     Memo *memo;
+    PyObject *metadata;
 } Token;
 
 typedef struct {
@@ -118,6 +119,11 @@ typedef struct {
     int is_keyword;
 } KeywordOrStarred;
 
+typedef struct {
+    void *result;
+    PyObject *metadata;
+} ResultTokenWithMetadata;
+
 // Internal parser functions
 #if defined(Py_DEBUG)
 void _PyPegen_clear_memo_statistics(void);
@@ -310,7 +316,8 @@ StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
 arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
                                      asdl_arg_seq *, asdl_seq *, StarEtc *);
 arguments_ty _PyPegen_empty_arguments(Parser *);
-expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, expr_ty, expr_ty, int, int, int, int, PyArena *);
+expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, ResultTokenWithMetadata *, ResultTokenWithMetadata *, Token *,
+                                 int, int, int, int, PyArena *);
 AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
 stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
 stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
@@ -329,7 +336,9 @@ expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
 asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
 int _PyPegen_check_barry_as_flufl(Parser *, Token *);
 int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
-expr_ty _PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
+ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
+ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
+                                                         int, int, PyArena *);
 mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
 void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
 expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index a8649b8547e256..4fd5343c364ef1 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -31,6 +31,7 @@
 /* Don't ever change this -- it would break the portability of Python code */
 #define TABSIZE 8
 
+#define TOK_NEXTC() tok_nextc(tok, token)
 #define MAKE_TOKEN(token_type) token_setup(tok, token, token_type, p_start, p_end)
 #define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
                 type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
@@ -58,7 +59,7 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
 
 /* Forward */
 static struct tok_state *tok_new(void);
-static int tok_nextc(struct tok_state *tok);
+static int tok_nextc(struct tok_state *tok, struct token *token);
 static void tok_backup(struct tok_state *tok, int c);
 static int syntaxerror(struct tok_state *tok, const char *format, ...);
 
@@ -391,7 +392,7 @@ restore_fstring_buffers(struct tok_state *tok)
 }
 
 static int
-update_fstring_expr(struct tok_state *tok, char cur)
+update_fstring_expr(struct tok_state *tok, struct token *token, char cur)
 {
     assert(tok->cur != NULL);
 
@@ -432,6 +433,15 @@ update_fstring_expr(struct tok_state *tok, char cur)
         case ':':
             if (tok_mode->last_expr_end == -1) {
                 tok_mode->last_expr_end = strlen(tok->start);
+                PyObject *res = PyUnicode_DecodeUTF8(
+                    tok_mode->last_expr_buffer,
+                    tok_mode->last_expr_size - tok_mode->last_expr_end,
+                    NULL
+                );
+                if (!res) {
+                    goto error;
+                }
+                token->metadata = res;
             }
             break;
         default:
@@ -959,7 +969,7 @@ _PyTokenizer_Free(struct tok_state *tok)
 }
 
 static int
-tok_readline_raw(struct tok_state *tok)
+tok_readline_raw(struct tok_state *tok, struct token *token)
 {
     do {
         if (!tok_reserve_buf(tok, BUFSIZ)) {
@@ -971,7 +981,7 @@ tok_readline_raw(struct tok_state *tok)
         if (line == NULL) {
             return 1;
         }
-        if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) {
+        if (tok->tok_mode_stack_index && !update_fstring_expr(tok, token, 0)) {
             return 0;
         }
         if (tok->fp_interactive &&
@@ -1009,7 +1019,7 @@ tok_underflow_string(struct tok_state *tok) {
 }
 
 static int
-tok_underflow_interactive(struct tok_state *tok) {
+tok_underflow_interactive(struct tok_state *tok, struct token *token) {
     if (tok->interactive_underflow == IUNDERFLOW_STOP) {
         tok->done = E_INTERACT_STOP;
         return 1;
@@ -1094,14 +1104,14 @@ tok_underflow_interactive(struct tok_state *tok) {
         return 0;
     }
 
-    if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) {
+    if (tok->tok_mode_stack_index && !update_fstring_expr(tok, token, 0)) {
         return 0;
     }
     return 1;
 }
 
 static int
-tok_underflow_file(struct tok_state *tok) {
+tok_underflow_file(struct tok_state *tok, struct token *token) {
     if (tok->start == NULL) {
         tok->cur = tok->inp = tok->buf;
     }
@@ -1124,7 +1134,7 @@ tok_underflow_file(struct tok_state *tok) {
     }
     else {
         /* We want a 'raw' read. */
-        if (!tok_readline_raw(tok)) {
+        if (!tok_readline_raw(tok, token)) {
             return 0;
         }
     }
@@ -1192,7 +1202,7 @@ print_escape(FILE *f, const char *s, Py_ssize_t size)
 /* Get next char, updating state; error code goes into tok->done */
 
 static int
-tok_nextc(struct tok_state *tok)
+tok_nextc(struct tok_state *tok, struct token *token)
 {
     int rc;
     for (;;) {
@@ -1207,10 +1217,10 @@ tok_nextc(struct tok_state *tok)
             rc = tok_underflow_string(tok);
         }
         else if (tok->prompt != NULL) {
-            rc = tok_underflow_interactive(tok);
+            rc = tok_underflow_interactive(tok, token);
         }
         else {
-            rc = tok_underflow_file(tok);
+            rc = tok_underflow_file(tok, token);
         }
 #if defined(Py_DEBUG)
         if (tok->debug) {
@@ -1399,12 +1409,12 @@ warn_invalid_escape_sequence(struct tok_state *tok, int first_invalid_escape_cha
 }
 
 static int
-lookahead(struct tok_state *tok, const char *test)
+lookahead(struct tok_state *tok, struct token *token, const char *test)
 {
     const char *s = test;
     int res = 0;
     while (1) {
-        int c = tok_nextc(tok);
+        int c = TOK_NEXTC();
         if (*s == 0) {
             res = !is_potential_identifier_char(c);
         }
@@ -1422,7 +1432,7 @@ lookahead(struct tok_state *tok, const char *test)
 }
 
 static int
-verify_end_of_number(struct tok_state *tok, int c, const char *kind)
+verify_end_of_number(struct tok_state *tok, struct token *token, int c, const char *kind)
 {
     /* Emit a deprecation warning only if the numeric literal is immediately
      * followed by one of keywords which can occur after a numeric literal
@@ -1436,26 +1446,26 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind)
      */
     int r = 0;
     if (c == 'a') {
-        r = lookahead(tok, "nd");
+        r = lookahead(tok, token, "nd");
     }
     else if (c == 'e') {
-        r = lookahead(tok, "lse");
+        r = lookahead(tok, token, "lse");
     }
     else if (c == 'f') {
-        r = lookahead(tok, "or");
+        r = lookahead(tok, token, "or");
     }
     else if (c == 'i') {
-        int c2 = tok_nextc(tok);
+        int c2 = TOK_NEXTC();
         if (c2 == 'f' || c2 == 'n' || c2 == 's') {
             r = 1;
         }
         tok_backup(tok, c2);
     }
     else if (c == 'o') {
-        r = lookahead(tok, "r");
+        r = lookahead(tok, token, "r");
     }
     else if (c == 'n') {
-        r = lookahead(tok, "ot");
+        r = lookahead(tok, token, "ot");
     }
     if (r) {
         tok_backup(tok, c);
@@ -1464,7 +1474,7 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind)
         {
             return 0;
         }
-        tok_nextc(tok);
+        TOK_NEXTC();
     }
     else /* In future releases, only error will remain. */
     if (is_potential_identifier_char(c)) {
@@ -1532,18 +1542,18 @@ verify_identifier(struct tok_state *tok)
 }
 
 static int
-tok_decimal_tail(struct tok_state *tok)
+tok_decimal_tail(struct tok_state *tok, struct token *token)
 {
     int c;
 
     while (1) {
         do {
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
         } while (isdigit(c));
         if (c != '_') {
             break;
         }
-        c = tok_nextc(tok);
+        c = TOK_NEXTC();
         if (!isdigit(c)) {
             tok_backup(tok, c);
             syntaxerror(tok, "invalid decimal literal");
@@ -1555,13 +1565,13 @@ tok_decimal_tail(struct tok_state *tok)
 
 
 static inline int
-tok_continuation_line(struct tok_state *tok) {
-    int c = tok_nextc(tok);
+tok_continuation_line(struct tok_state *tok, struct token *token) {
+    int c = TOK_NEXTC();
     if (c != '\n') {
         tok->done = E_LINECONT;
         return -1;
     }
-    c = tok_nextc(tok);
+    c = TOK_NEXTC();
     if (c == EOF) {
         tok->done = E_EOF;
         tok->cur = tok->inp;
@@ -1628,7 +1638,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         tok->atbol = 0;
         int cont_line_col = 0;
         for (;;) {
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
             if (c == ' ') {
                 col++, altcol++;
             }
@@ -1645,7 +1655,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 // preceded by whitespace, **the first one we find** determines
                 // the level of indentation of whatever comes next.
                 cont_line_col = cont_line_col ? cont_line_col : col;
-                if ((c = tok_continuation_line(tok)) == -1) {
+                if ((c = tok_continuation_line(tok, token)) == -1) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
@@ -1733,7 +1743,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     }
 
     /* Peek ahead at the next character */
-    c = tok_nextc(tok);
+    c = TOK_NEXTC();
     tok_backup(tok, c);
     /* Check if we are closing an async function */
     if (tok->async_def
@@ -1761,7 +1771,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     tok->start = NULL;
     /* Skip spaces */
     do {
-        c = tok_nextc(tok);
+        c = TOK_NEXTC();
     } while (c == ' ' || c == '\t' || c == '\014');
 
     /* Set start of current token */
@@ -1779,7 +1789,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         int current_starting_col_offset;
 
         while (c != EOF && c != '\n') {
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
         }
 
         if (tok->type_comments) {
@@ -1825,7 +1835,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
                     /* If this type ignore is the only thing on the line, consume the newline also. */
                     if (blankline) {
-                        tok_nextc(tok);
+                        TOK_NEXTC();
                         tok->atbol = 1;
                     }
                     return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
@@ -1874,7 +1884,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
             else {
                 break;
             }
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
             if (c == '"' || c == '\'') {
                 if (saw_f) {
                     goto f_string_quote;
@@ -1886,7 +1896,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
             if (c >= 128) {
                 nonascii = 1;
             }
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
         }
         tok_backup(tok, c);
         if (nonascii && !verify_identifier(tok)) {
@@ -1963,11 +1973,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
     /* Period or number starting with period? */
     if (c == '.') {
-        c = tok_nextc(tok);
+        c = TOK_NEXTC();
         if (isdigit(c)) {
             goto fraction;
         } else if (c == '.') {
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
             if (c == '.') {
                 p_start = tok->start;
                 p_end = tok->cur;
@@ -1990,32 +2000,32 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     if (isdigit(c)) {
         if (c == '0') {
             /* Hex, octal or binary -- maybe. */
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
             if (c == 'x' || c == 'X') {
                 /* Hex */
-                c = tok_nextc(tok);
+                c = TOK_NEXTC();
                 do {
                     if (c == '_') {
-                        c = tok_nextc(tok);
+                        c = TOK_NEXTC();
                     }
                     if (!isxdigit(c)) {
                         tok_backup(tok, c);
                         return MAKE_TOKEN(syntaxerror(tok, "invalid hexadecimal literal"));
                     }
                     do {
-                        c = tok_nextc(tok);
+                        c = TOK_NEXTC();
                     } while (isxdigit(c));
                 } while (c == '_');
-                if (!verify_end_of_number(tok, c, "hexadecimal")) {
+                if (!verify_end_of_number(tok, token, c, "hexadecimal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
             else if (c == 'o' || c == 'O') {
                 /* Octal */
-                c = tok_nextc(tok);
+                c = TOK_NEXTC();
                 do {
                     if (c == '_') {
-                        c = tok_nextc(tok);
+                        c = TOK_NEXTC();
                     }
                     if (c < '0' || c >= '8') {
                         if (isdigit(c)) {
@@ -2028,23 +2038,23 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         }
                     }
                     do {
-                        c = tok_nextc(tok);
+                        c = TOK_NEXTC();
                     } while ('0' <= c && c < '8');
                 } while (c == '_');
                 if (isdigit(c)) {
                     return MAKE_TOKEN(syntaxerror(tok,
                             "invalid digit '%c' in octal literal", c));
                 }
-                if (!verify_end_of_number(tok, c, "octal")) {
+                if (!verify_end_of_number(tok, token, c, "octal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
             else if (c == 'b' || c == 'B') {
                 /* Binary */
-                c = tok_nextc(tok);
+                c = TOK_NEXTC();
                 do {
                     if (c == '_') {
-                        c = tok_nextc(tok);
+                        c = TOK_NEXTC();
                     }
                     if (c != '0' && c != '1') {
                         if (isdigit(c)) {
@@ -2056,13 +2066,13 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         }
                     }
                     do {
-                        c = tok_nextc(tok);
+                        c = TOK_NEXTC();
                     } while (c == '0' || c == '1');
                 } while (c == '_');
                 if (isdigit(c)) {
                     return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c));
                 }
-                if (!verify_end_of_number(tok, c, "binary")) {
+                if (!verify_end_of_number(tok, token, c, "binary")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
@@ -2072,7 +2082,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 /* in any case, allow '0' as a literal */
                 while (1) {
                     if (c == '_') {
-                        c = tok_nextc(tok);
+                        c = TOK_NEXTC();
                         if (!isdigit(c)) {
                             tok_backup(tok, c);
                             return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal"));
@@ -2081,18 +2091,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                     if (c != '0') {
                         break;
                     }
-                    c = tok_nextc(tok);
+                    c = TOK_NEXTC();
                 }
                 char* zeros_end = tok->cur;
                 if (isdigit(c)) {
                     nonzero = 1;
-                    c = tok_decimal_tail(tok);
+                    c = tok_decimal_tail(tok, token);
                     if (c == 0) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
                 }
                 if (c == '.') {
-                    c = tok_nextc(tok);
+                    c = TOK_NEXTC();
                     goto fraction;
                 }
                 else if (c == 'e' || c == 'E') {
@@ -2111,25 +2121,25 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                             "literals are not permitted; "
                             "use an 0o prefix for octal integers"));
                 }
-                if (!verify_end_of_number(tok, c, "decimal")) {
+                if (!verify_end_of_number(tok, token, c, "decimal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
         }
         else {
             /* Decimal */
-            c = tok_decimal_tail(tok);
+            c = tok_decimal_tail(tok, token);
             if (c == 0) {
                 return MAKE_TOKEN(ERRORTOKEN);
             }
             {
                 /* Accept floating point numbers. */
                 if (c == '.') {
-                    c = tok_nextc(tok);
+                    c = TOK_NEXTC();
         fraction:
                     /* Fraction */
                     if (isdigit(c)) {
-                        c = tok_decimal_tail(tok);
+                        c = tok_decimal_tail(tok, token);
                         if (c == 0) {
                             return MAKE_TOKEN(ERRORTOKEN);
                         }
@@ -2140,16 +2150,16 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                   exponent:
                     e = c;
                     /* Exponent part */
-                    c = tok_nextc(tok);
+                    c = TOK_NEXTC();
                     if (c == '+' || c == '-') {
-                        c = tok_nextc(tok);
+                        c = TOK_NEXTC();
                         if (!isdigit(c)) {
                             tok_backup(tok, c);
                             return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal"));
                         }
                     } else if (!isdigit(c)) {
                         tok_backup(tok, c);
-                        if (!verify_end_of_number(tok, e, "decimal")) {
+                        if (!verify_end_of_number(tok, token, e, "decimal")) {
                             return MAKE_TOKEN(ERRORTOKEN);
                         }
                         tok_backup(tok, e);
@@ -2157,7 +2167,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         p_end = tok->cur;
                         return MAKE_TOKEN(NUMBER);
                     }
-                    c = tok_decimal_tail(tok);
+                    c = tok_decimal_tail(tok, token);
                     if (c == 0) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
@@ -2165,12 +2175,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 if (c == 'j' || c == 'J') {
                     /* Imaginary part */
         imaginary:
-                    c = tok_nextc(tok);
-                    if (!verify_end_of_number(tok, c, "imaginary")) {
+                    c = TOK_NEXTC();
+                    if (!verify_end_of_number(tok, token, c, "imaginary")) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
                 }
-                else if (!verify_end_of_number(tok, c, "decimal")) {
+                else if (!verify_end_of_number(tok, token, c, "decimal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
@@ -2194,9 +2204,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         tok->multi_line_start = tok->line_start;
 
         /* Find the quote size and start of string */
-        int after_quote = tok_nextc(tok);
+        int after_quote = TOK_NEXTC();
         if (after_quote == quote) {
-            int after_after_quote = tok_nextc(tok);
+            int after_after_quote = TOK_NEXTC();
             if (after_after_quote == quote) {
                 quote_size = 3;
             }
@@ -2258,9 +2268,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         tok->multi_line_start = tok->line_start;
 
         /* Find the quote size and start of string */
-        c = tok_nextc(tok);
+        c = TOK_NEXTC();
         if (c == quote) {
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
             if (c == quote) {
                 quote_size = 3;
             }
@@ -2274,7 +2284,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
         /* Get rest of string */
         while (end_quote_size != quote_size) {
-            c = tok_nextc(tok);
+            c = TOK_NEXTC();
             if (tok->done == E_DECODE)
                 break;
             if (c == EOF || (quote_size == 1 && c == '\n')) {
@@ -2324,7 +2334,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
             else {
                 end_quote_size = 0;
                 if (c == '\\') {
-                    tok_nextc(tok);  /* skip escaped char */
+                    TOK_NEXTC();  /* skip escaped char */
                 }
             }
         }
@@ -2336,7 +2346,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
     /* Line continuation */
     if (c == '\\') {
-        if ((c = tok_continuation_line(tok)) == -1) {
+        if ((c = tok_continuation_line(tok, token)) == -1) {
             return MAKE_TOKEN(ERRORTOKEN);
         }
         tok->cont_line = 1;
@@ -2351,7 +2361,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
          * to adjust it manually */
         int cursor = current_tok->curly_bracket_depth - (c != '{');
 
-        if (cursor == 0 && !update_fstring_expr(tok, c)) {
+        if (cursor == 0 && !update_fstring_expr(tok, token, c)) {
             return MAKE_TOKEN(ENDMARKER);
         }
 
@@ -2365,10 +2375,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
     /* Check for two-character token */
     {
-        int c2 = tok_nextc(tok);
+        int c2 = TOK_NEXTC();
         int current_token = _PyToken_TwoChars(c, c2);
         if (current_token != OP) {
-            int c3 = tok_nextc(tok);
+            int c3 = TOK_NEXTC();
             int current_token3 = _PyToken_ThreeChars(c, c2, c3);
             if (current_token3 != OP) {
                 current_token = current_token3;
@@ -2478,9 +2488,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
 
     // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
     // before it.
-    int start_char = tok_nextc(tok);
+    int start_char = TOK_NEXTC();
     if (start_char == '{') {
-        int peek1 = tok_nextc(tok);
+        int peek1 = TOK_NEXTC();
         tok_backup(tok, peek1);
         tok_backup(tok, start_char);
         if (peek1 != '{') {
@@ -2498,7 +2508,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
 
     // Check if we are at the end of the string
     for (int i = 0; i < current_tok->f_string_quote_size; i++) {
-        int quote = tok_nextc(tok);
+        int quote = TOK_NEXTC();
         if (quote != current_tok->f_string_quote) {
             tok_backup(tok, quote);
             goto f_string_middle;
@@ -2520,7 +2530,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
 f_string_middle:
 
     while (end_quote_size != current_tok->f_string_quote_size) {
-        int c = tok_nextc(tok);
+        int c = TOK_NEXTC();
         if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
             assert(tok->multi_line_start != NULL);
             // shift the tok_state's location into
@@ -2557,7 +2567,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
                 INSIDE_FSTRING_EXPR(current_tok)
         );
         if (c == '{') {
-            int peek = tok_nextc(tok);
+            int peek = TOK_NEXTC();
             if (peek != '{' || in_format_spec) {
                 tok_backup(tok, peek);
                 tok_backup(tok, c);
@@ -2579,7 +2589,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
                 p_end = tok->cur;
                 return MAKE_TOKEN(FSTRING_MIDDLE);
             }
-            int peek = tok_nextc(tok);
+            int peek = TOK_NEXTC();
 
             // The tokenizer can only be in the format spec if we have already completed the expression
             // scanning (indicated by the end of the expression being set) and we are not at the top level
@@ -2597,7 +2607,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
             }
             return MAKE_TOKEN(FSTRING_MIDDLE);
         } else if (c == '\\') {
-            int peek = tok_nextc(tok);
+            int peek = TOK_NEXTC();
             // Special case when the backslash is right before a curly
             // brace. We have to restore and return the control back
             // to the loop for the next iteration.
@@ -2614,7 +2624,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
             if (!current_tok->f_string_raw) {
                 if (peek == 'N') {
                     /* Handle named unicode escapes (\N{BULLET}) */
-                    peek = tok_nextc(tok);
+                    peek = TOK_NEXTC();
                     if (peek == '{') {
                         unicode_escape = 1;
                     } else {
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 2b94aecce626c3..97b54dc2a625ef 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -31,6 +31,7 @@ struct token {
     int level;
     int lineno, col_offset, end_lineno, end_col_offset;
     const char *start, *end;
+    PyObject *metadata;
 };
 
 enum tokenizer_mode_kind_t {

From 51d7d83af3b2331e1b6e0f1262527edc8e177fe5 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Thu, 27 Apr 2023 00:18:02 +0100
Subject: [PATCH 2/8] fixup! gh-103656: Transfer f-string buffers to parser to
 avoid use-after-free

---
 Parser/action_helpers.c |  21 -----
 Parser/tokenizer.c      | 198 +++++++++++++++++++++-------------------
 2 files changed, 104 insertions(+), 115 deletions(-)

diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
index 168e01015fb761..0aaaed64c4037c 100644
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@@ -1223,27 +1223,6 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq
 
 // Fstring stuff
 
-static expr_ty
-decode_fstring_buffer(Parser *p, int lineno, int col_offset, int end_lineno,
-                      int end_col_offset)
-{
-    tokenizer_mode *tok_mode = &(p->tok->tok_mode_stack[p->tok->tok_mode_stack_index]);
-    assert(tok_mode->last_expr_buffer != NULL);
-    assert(tok_mode->last_expr_size >= 0 && tok_mode->last_expr_end >= 0);
-
-    PyObject *res = PyUnicode_DecodeUTF8(
-        tok_mode->last_expr_buffer,
-        tok_mode->last_expr_size - tok_mode->last_expr_end,
-        NULL
-    );
-    if (!res || _PyArena_AddPyObject(p->arena, res) < 0) {
-        Py_XDECREF(res);
-        return NULL;
-    }
-
-    return _PyAST_Constant(res, NULL, lineno, col_offset, end_lineno, end_col_offset, p->arena);
-}
-
 static expr_ty
 _PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant) {
     assert(PyUnicode_CheckExact(constant->v.Constant.value));
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 4fd5343c364ef1..045136d4c6af62 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -31,7 +31,6 @@
 /* Don't ever change this -- it would break the portability of Python code */
 #define TABSIZE 8
 
-#define TOK_NEXTC() tok_nextc(tok, token)
 #define MAKE_TOKEN(token_type) token_setup(tok, token, token_type, p_start, p_end)
 #define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
                 type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
@@ -59,7 +58,7 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
 
 /* Forward */
 static struct tok_state *tok_new(void);
-static int tok_nextc(struct tok_state *tok, struct token *token);
+static int tok_nextc(struct tok_state *tok);
 static void tok_backup(struct tok_state *tok, int c);
 static int syntaxerror(struct tok_state *tok, const char *format, ...);
 
@@ -392,7 +391,25 @@ restore_fstring_buffers(struct tok_state *tok)
 }
 
 static int
-update_fstring_expr(struct tok_state *tok, struct token *token, char cur)
+set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
+    assert(token != NULL);
+    assert(c == '{' || c == ':' || c == '!');
+
+    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
+    PyObject *res = PyUnicode_DecodeUTF8(
+        tok_mode->last_expr_buffer,
+        tok_mode->last_expr_size - tok_mode->last_expr_end,
+        NULL
+    );
+    if (!res) {
+        return -1;
+    }
+    token->metadata = res;
+    return 0;
+}
+
+static int
+update_fstring_expr(struct tok_state *tok, char cur)
 {
     assert(tok->cur != NULL);
 
@@ -433,15 +450,6 @@ update_fstring_expr(struct tok_state *tok, struct token *token, char cur)
         case ':':
             if (tok_mode->last_expr_end == -1) {
                 tok_mode->last_expr_end = strlen(tok->start);
-                PyObject *res = PyUnicode_DecodeUTF8(
-                    tok_mode->last_expr_buffer,
-                    tok_mode->last_expr_size - tok_mode->last_expr_end,
-                    NULL
-                );
-                if (!res) {
-                    goto error;
-                }
-                token->metadata = res;
             }
             break;
         default:
@@ -969,7 +977,7 @@ _PyTokenizer_Free(struct tok_state *tok)
 }
 
 static int
-tok_readline_raw(struct tok_state *tok, struct token *token)
+tok_readline_raw(struct tok_state *tok)
 {
     do {
         if (!tok_reserve_buf(tok, BUFSIZ)) {
@@ -981,7 +989,7 @@ tok_readline_raw(struct tok_state *tok, struct token *token)
         if (line == NULL) {
             return 1;
         }
-        if (tok->tok_mode_stack_index && !update_fstring_expr(tok, token, 0)) {
+        if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) {
             return 0;
         }
         if (tok->fp_interactive &&
@@ -1019,7 +1027,7 @@ tok_underflow_string(struct tok_state *tok) {
 }
 
 static int
-tok_underflow_interactive(struct tok_state *tok, struct token *token) {
+tok_underflow_interactive(struct tok_state *tok) {
     if (tok->interactive_underflow == IUNDERFLOW_STOP) {
         tok->done = E_INTERACT_STOP;
         return 1;
@@ -1104,14 +1112,14 @@ tok_underflow_interactive(struct tok_state *tok, struct token *token) {
         return 0;
     }
 
-    if (tok->tok_mode_stack_index && !update_fstring_expr(tok, token, 0)) {
+    if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) {
         return 0;
     }
     return 1;
 }
 
 static int
-tok_underflow_file(struct tok_state *tok, struct token *token) {
+tok_underflow_file(struct tok_state *tok) {
     if (tok->start == NULL) {
         tok->cur = tok->inp = tok->buf;
     }
@@ -1134,7 +1142,7 @@ tok_underflow_file(struct tok_state *tok, struct token *token) {
     }
     else {
         /* We want a 'raw' read. */
-        if (!tok_readline_raw(tok, token)) {
+        if (!tok_readline_raw(tok)) {
             return 0;
         }
     }
@@ -1202,7 +1210,7 @@ print_escape(FILE *f, const char *s, Py_ssize_t size)
 /* Get next char, updating state; error code goes into tok->done */
 
 static int
-tok_nextc(struct tok_state *tok, struct token *token)
+tok_nextc(struct tok_state *tok)
 {
     int rc;
     for (;;) {
@@ -1217,10 +1225,10 @@ tok_nextc(struct tok_state *tok, struct token *token)
             rc = tok_underflow_string(tok);
         }
         else if (tok->prompt != NULL) {
-            rc = tok_underflow_interactive(tok, token);
+            rc = tok_underflow_interactive(tok);
         }
         else {
-            rc = tok_underflow_file(tok, token);
+            rc = tok_underflow_file(tok);
         }
 #if defined(Py_DEBUG)
         if (tok->debug) {
@@ -1409,12 +1417,12 @@ warn_invalid_escape_sequence(struct tok_state *tok, int first_invalid_escape_cha
 }
 
 static int
-lookahead(struct tok_state *tok, struct token *token, const char *test)
+lookahead(struct tok_state *tok, const char *test)
 {
     const char *s = test;
     int res = 0;
     while (1) {
-        int c = TOK_NEXTC();
+        int c = tok_nextc(tok);
         if (*s == 0) {
             res = !is_potential_identifier_char(c);
         }
@@ -1432,7 +1440,7 @@ lookahead(struct tok_state *tok, struct token *token, const char *test)
 }
 
 static int
-verify_end_of_number(struct tok_state *tok, struct token *token, int c, const char *kind)
+verify_end_of_number(struct tok_state *tok, int c, const char *kind)
 {
     /* Emit a deprecation warning only if the numeric literal is immediately
      * followed by one of keywords which can occur after a numeric literal
@@ -1446,26 +1454,26 @@ verify_end_of_number(struct tok_state *tok, struct token *token, int c, const ch
      */
     int r = 0;
     if (c == 'a') {
-        r = lookahead(tok, token, "nd");
+        r = lookahead(tok, "nd");
     }
     else if (c == 'e') {
-        r = lookahead(tok, token, "lse");
+        r = lookahead(tok, "lse");
     }
     else if (c == 'f') {
-        r = lookahead(tok, token, "or");
+        r = lookahead(tok, "or");
     }
     else if (c == 'i') {
-        int c2 = TOK_NEXTC();
+        int c2 = tok_nextc(tok);
         if (c2 == 'f' || c2 == 'n' || c2 == 's') {
             r = 1;
         }
         tok_backup(tok, c2);
     }
     else if (c == 'o') {
-        r = lookahead(tok, token, "r");
+        r = lookahead(tok, "r");
     }
     else if (c == 'n') {
-        r = lookahead(tok, token, "ot");
+        r = lookahead(tok, "ot");
     }
     if (r) {
         tok_backup(tok, c);
@@ -1474,7 +1482,7 @@ verify_end_of_number(struct tok_state *tok, struct token *token, int c, const ch
         {
             return 0;
         }
-        TOK_NEXTC();
+        tok_nextc(tok);
     }
     else /* In future releases, only error will remain. */
     if (is_potential_identifier_char(c)) {
@@ -1542,18 +1550,18 @@ verify_identifier(struct tok_state *tok)
 }
 
 static int
-tok_decimal_tail(struct tok_state *tok, struct token *token)
+tok_decimal_tail(struct tok_state *tok)
 {
     int c;
 
     while (1) {
         do {
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
         } while (isdigit(c));
         if (c != '_') {
             break;
         }
-        c = TOK_NEXTC();
+        c = tok_nextc(tok);
         if (!isdigit(c)) {
             tok_backup(tok, c);
             syntaxerror(tok, "invalid decimal literal");
@@ -1565,13 +1573,13 @@ tok_decimal_tail(struct tok_state *tok, struct token *token)
 
 
 static inline int
-tok_continuation_line(struct tok_state *tok, struct token *token) {
-    int c = TOK_NEXTC();
+tok_continuation_line(struct tok_state *tok) {
+    int c = tok_nextc(tok);
     if (c != '\n') {
         tok->done = E_LINECONT;
         return -1;
     }
-    c = TOK_NEXTC();
+    c = tok_nextc(tok);
     if (c == EOF) {
         tok->done = E_EOF;
         tok->cur = tok->inp;
@@ -1638,7 +1646,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         tok->atbol = 0;
         int cont_line_col = 0;
         for (;;) {
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
             if (c == ' ') {
                 col++, altcol++;
             }
@@ -1655,7 +1663,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 // preceded by whitespace, **the first one we find** determines
                 // the level of indentation of whatever comes next.
                 cont_line_col = cont_line_col ? cont_line_col : col;
-                if ((c = tok_continuation_line(tok, token)) == -1) {
+                if ((c = tok_continuation_line(tok)) == -1) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
@@ -1743,7 +1751,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     }
 
     /* Peek ahead at the next character */
-    c = TOK_NEXTC();
+    c = tok_nextc(tok);
     tok_backup(tok, c);
     /* Check if we are closing an async function */
     if (tok->async_def
@@ -1771,7 +1779,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     tok->start = NULL;
     /* Skip spaces */
     do {
-        c = TOK_NEXTC();
+        c = tok_nextc(tok);
     } while (c == ' ' || c == '\t' || c == '\014');
 
     /* Set start of current token */
@@ -1789,7 +1797,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         int current_starting_col_offset;
 
         while (c != EOF && c != '\n') {
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
         }
 
         if (tok->type_comments) {
@@ -1835,7 +1843,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
                     /* If this type ignore is the only thing on the line, consume the newline also. */
                     if (blankline) {
-                        TOK_NEXTC();
+                        tok_nextc(tok);
                         tok->atbol = 1;
                     }
                     return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
@@ -1884,7 +1892,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
             else {
                 break;
             }
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
             if (c == '"' || c == '\'') {
                 if (saw_f) {
                     goto f_string_quote;
@@ -1896,7 +1904,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
             if (c >= 128) {
                 nonascii = 1;
             }
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
         }
         tok_backup(tok, c);
         if (nonascii && !verify_identifier(tok)) {
@@ -1973,11 +1981,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
     /* Period or number starting with period? */
     if (c == '.') {
-        c = TOK_NEXTC();
+        c = tok_nextc(tok);
         if (isdigit(c)) {
             goto fraction;
         } else if (c == '.') {
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
             if (c == '.') {
                 p_start = tok->start;
                 p_end = tok->cur;
@@ -2000,32 +2008,32 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     if (isdigit(c)) {
         if (c == '0') {
             /* Hex, octal or binary -- maybe. */
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
             if (c == 'x' || c == 'X') {
                 /* Hex */
-                c = TOK_NEXTC();
+                c = tok_nextc(tok);
                 do {
                     if (c == '_') {
-                        c = TOK_NEXTC();
+                        c = tok_nextc(tok);
                     }
                     if (!isxdigit(c)) {
                         tok_backup(tok, c);
                         return MAKE_TOKEN(syntaxerror(tok, "invalid hexadecimal literal"));
                     }
                     do {
-                        c = TOK_NEXTC();
+                        c = tok_nextc(tok);
                     } while (isxdigit(c));
                 } while (c == '_');
-                if (!verify_end_of_number(tok, token, c, "hexadecimal")) {
+                if (!verify_end_of_number(tok, c, "hexadecimal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
             else if (c == 'o' || c == 'O') {
                 /* Octal */
-                c = TOK_NEXTC();
+                c = tok_nextc(tok);
                 do {
                     if (c == '_') {
-                        c = TOK_NEXTC();
+                        c = tok_nextc(tok);
                     }
                     if (c < '0' || c >= '8') {
                         if (isdigit(c)) {
@@ -2038,23 +2046,23 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         }
                     }
                     do {
-                        c = TOK_NEXTC();
+                        c = tok_nextc(tok);
                     } while ('0' <= c && c < '8');
                 } while (c == '_');
                 if (isdigit(c)) {
                     return MAKE_TOKEN(syntaxerror(tok,
                             "invalid digit '%c' in octal literal", c));
                 }
-                if (!verify_end_of_number(tok, token, c, "octal")) {
+                if (!verify_end_of_number(tok, c, "octal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
             else if (c == 'b' || c == 'B') {
                 /* Binary */
-                c = TOK_NEXTC();
+                c = tok_nextc(tok);
                 do {
                     if (c == '_') {
-                        c = TOK_NEXTC();
+                        c = tok_nextc(tok);
                     }
                     if (c != '0' && c != '1') {
                         if (isdigit(c)) {
@@ -2066,13 +2074,13 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         }
                     }
                     do {
-                        c = TOK_NEXTC();
+                        c = tok_nextc(tok);
                     } while (c == '0' || c == '1');
                 } while (c == '_');
                 if (isdigit(c)) {
                     return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c));
                 }
-                if (!verify_end_of_number(tok, token, c, "binary")) {
+                if (!verify_end_of_number(tok, c, "binary")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
@@ -2082,7 +2090,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 /* in any case, allow '0' as a literal */
                 while (1) {
                     if (c == '_') {
-                        c = TOK_NEXTC();
+                        c = tok_nextc(tok);
                         if (!isdigit(c)) {
                             tok_backup(tok, c);
                             return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal"));
@@ -2091,18 +2099,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                     if (c != '0') {
                         break;
                     }
-                    c = TOK_NEXTC();
+                    c = tok_nextc(tok);
                 }
                 char* zeros_end = tok->cur;
                 if (isdigit(c)) {
                     nonzero = 1;
-                    c = tok_decimal_tail(tok, token);
+                    c = tok_decimal_tail(tok);
                     if (c == 0) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
                 }
                 if (c == '.') {
-                    c = TOK_NEXTC();
+                    c = tok_nextc(tok);
                     goto fraction;
                 }
                 else if (c == 'e' || c == 'E') {
@@ -2121,25 +2129,25 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                             "literals are not permitted; "
                             "use an 0o prefix for octal integers"));
                 }
-                if (!verify_end_of_number(tok, token, c, "decimal")) {
+                if (!verify_end_of_number(tok, c, "decimal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
         }
         else {
             /* Decimal */
-            c = tok_decimal_tail(tok, token);
+            c = tok_decimal_tail(tok);
             if (c == 0) {
                 return MAKE_TOKEN(ERRORTOKEN);
             }
             {
                 /* Accept floating point numbers. */
                 if (c == '.') {
-                    c = TOK_NEXTC();
+                    c = tok_nextc(tok);
         fraction:
                     /* Fraction */
                     if (isdigit(c)) {
-                        c = tok_decimal_tail(tok, token);
+                        c = tok_decimal_tail(tok);
                         if (c == 0) {
                             return MAKE_TOKEN(ERRORTOKEN);
                         }
@@ -2150,16 +2158,16 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                   exponent:
                     e = c;
                     /* Exponent part */
-                    c = TOK_NEXTC();
+                    c = tok_nextc(tok);
                     if (c == '+' || c == '-') {
-                        c = TOK_NEXTC();
+                        c = tok_nextc(tok);
                         if (!isdigit(c)) {
                             tok_backup(tok, c);
                             return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal"));
                         }
                     } else if (!isdigit(c)) {
                         tok_backup(tok, c);
-                        if (!verify_end_of_number(tok, token, e, "decimal")) {
+                        if (!verify_end_of_number(tok, e, "decimal")) {
                             return MAKE_TOKEN(ERRORTOKEN);
                         }
                         tok_backup(tok, e);
@@ -2167,7 +2175,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         p_end = tok->cur;
                         return MAKE_TOKEN(NUMBER);
                     }
-                    c = tok_decimal_tail(tok, token);
+                    c = tok_decimal_tail(tok);
                     if (c == 0) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
@@ -2175,12 +2183,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 if (c == 'j' || c == 'J') {
                     /* Imaginary part */
         imaginary:
-                    c = TOK_NEXTC();
-                    if (!verify_end_of_number(tok, token, c, "imaginary")) {
+                    c = tok_nextc(tok);
+                    if (!verify_end_of_number(tok, c, "imaginary")) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
                 }
-                else if (!verify_end_of_number(tok, token, c, "decimal")) {
+                else if (!verify_end_of_number(tok, c, "decimal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
             }
@@ -2204,9 +2212,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         tok->multi_line_start = tok->line_start;
 
         /* Find the quote size and start of string */
-        int after_quote = TOK_NEXTC();
+        int after_quote = tok_nextc(tok);
         if (after_quote == quote) {
-            int after_after_quote = TOK_NEXTC();
+            int after_after_quote = tok_nextc(tok);
             if (after_after_quote == quote) {
                 quote_size = 3;
             }
@@ -2268,9 +2276,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         tok->multi_line_start = tok->line_start;
 
         /* Find the quote size and start of string */
-        c = TOK_NEXTC();
+        c = tok_nextc(tok);
         if (c == quote) {
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
             if (c == quote) {
                 quote_size = 3;
             }
@@ -2284,7 +2292,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
         /* Get rest of string */
         while (end_quote_size != quote_size) {
-            c = TOK_NEXTC();
+            c = tok_nextc(tok);
             if (tok->done == E_DECODE)
                 break;
             if (c == EOF || (quote_size == 1 && c == '\n')) {
@@ -2334,7 +2342,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
             else {
                 end_quote_size = 0;
                 if (c == '\\') {
-                    TOK_NEXTC();  /* skip escaped char */
+                    tok_nextc(tok);  /* skip escaped char */
                 }
             }
         }
@@ -2346,7 +2354,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
     /* Line continuation */
     if (c == '\\') {
-        if ((c = tok_continuation_line(tok, token)) == -1) {
+        if ((c = tok_continuation_line(tok)) == -1) {
             return MAKE_TOKEN(ERRORTOKEN);
         }
         tok->cont_line = 1;
@@ -2360,10 +2368,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
          * by the `{` case, so for ensuring that we are on the 0th level, we need
          * to adjust it manually */
         int cursor = current_tok->curly_bracket_depth - (c != '{');
-
-        if (cursor == 0 && !update_fstring_expr(tok, token, c)) {
+        if (cursor == 0 && !update_fstring_expr(tok, c)) {
             return MAKE_TOKEN(ENDMARKER);
         }
+        if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) {
+            return MAKE_TOKEN(ERRORTOKEN);
+        }
 
         if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
             current_tok->kind = TOK_FSTRING_MODE;
@@ -2375,10 +2385,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
 
     /* Check for two-character token */
     {
-        int c2 = TOK_NEXTC();
+        int c2 = tok_nextc(tok);
         int current_token = _PyToken_TwoChars(c, c2);
         if (current_token != OP) {
-            int c3 = TOK_NEXTC();
+            int c3 = tok_nextc(tok);
             int current_token3 = _PyToken_ThreeChars(c, c2, c3);
             if (current_token3 != OP) {
                 current_token = current_token3;
@@ -2488,9 +2498,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
 
     // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
     // before it.
-    int start_char = TOK_NEXTC();
+    int start_char = tok_nextc(tok);
     if (start_char == '{') {
-        int peek1 = TOK_NEXTC();
+        int peek1 = tok_nextc(tok);
         tok_backup(tok, peek1);
         tok_backup(tok, start_char);
         if (peek1 != '{') {
@@ -2508,7 +2518,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
 
     // Check if we are at the end of the string
     for (int i = 0; i < current_tok->f_string_quote_size; i++) {
-        int quote = TOK_NEXTC();
+        int quote = tok_nextc(tok);
         if (quote != current_tok->f_string_quote) {
             tok_backup(tok, quote);
             goto f_string_middle;
@@ -2530,7 +2540,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
 f_string_middle:
 
     while (end_quote_size != current_tok->f_string_quote_size) {
-        int c = TOK_NEXTC();
+        int c = tok_nextc(tok);
         if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
             assert(tok->multi_line_start != NULL);
             // shift the tok_state's location into
@@ -2567,7 +2577,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
                 INSIDE_FSTRING_EXPR(current_tok)
         );
         if (c == '{') {
-            int peek = TOK_NEXTC();
+            int peek = tok_nextc(tok);
             if (peek != '{' || in_format_spec) {
                 tok_backup(tok, peek);
                 tok_backup(tok, c);
@@ -2589,7 +2599,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
                 p_end = tok->cur;
                 return MAKE_TOKEN(FSTRING_MIDDLE);
             }
-            int peek = TOK_NEXTC();
+            int peek = tok_nextc(tok);
 
             // The tokenizer can only be in the format spec if we have already completed the expression
             // scanning (indicated by the end of the expression being set) and we are not at the top level
@@ -2607,7 +2617,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
             }
             return MAKE_TOKEN(FSTRING_MIDDLE);
         } else if (c == '\\') {
-            int peek = TOK_NEXTC();
+            int peek = tok_nextc(tok);
             // Special case when the backslash is right before a curly
             // brace. We have to restore and return the control back
             // to the loop for the next iteration.
@@ -2624,7 +2634,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
             if (!current_tok->f_string_raw) {
                 if (peek == 'N') {
                     /* Handle named unicode escapes (\N{BULLET}) */
-                    peek = TOK_NEXTC();
+                    peek = tok_nextc(tok);
                     if (peek == '{') {
                         unicode_escape = 1;
                     } else {

From 326059ea6cba7a7e94fafc53f3e4cf939b26ea9d Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Thu, 27 Apr 2023 00:29:58 +0100
Subject: [PATCH 3/8] fixup! fixup! gh-103656: Transfer f-string buffers to
 parser to avoid use-after-free

---
 Parser/pegen.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/Parser/pegen.c b/Parser/pegen.c
index 97f86e1372f7c0..07d5e9f4a59843 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -155,15 +155,13 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
         return -1;
     }
 
+    parser_token->metadata = NULL;
     if (new_token->metadata != NULL) {
-        parser_token->metadata = new_token->metadata;
-        if (_PyArena_AddPyObject(p->arena, parser_token->metadata) < 0) {
+        if (_PyArena_AddPyObject(p->arena, new_token->metadata) < 0) {
             Py_DECREF(parser_token->metadata);
             return -1;
         }
-    }
-    else {
-        parser_token->metadata = NULL;
+        parser_token->metadata = new_token->metadata;
     }
 
     parser_token->level = new_token->level;

From f2754e1cc63e103d4925e22d89ed25291f52b04c Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Thu, 27 Apr 2023 00:53:52 +0100
Subject: [PATCH 4/8] fixup! fixup! fixup! gh-103656: Transfer f-string buffers
 to parser to avoid use-after-free

---
 Parser/pegen.c     | 1 +
 Parser/tokenizer.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Parser/pegen.c b/Parser/pegen.c
index 07d5e9f4a59843..83aad33aa906cd 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -161,6 +161,7 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
             Py_DECREF(parser_token->metadata);
             return -1;
         }
+        Py_DECREF(parser_token->metadata);
         parser_token->metadata = new_token->metadata;
     }
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 045136d4c6af62..fb41489a990d8b 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -393,7 +393,7 @@ restore_fstring_buffers(struct tok_state *tok)
 static int
 set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
     assert(token != NULL);
-    assert(c == '{' || c == ':' || c == '!');
+    assert(c == '}' || c == ':' || c == '!');
 
     tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
     PyObject *res = PyUnicode_DecodeUTF8(

From ee801d8cde1c66ca59118ea6ed99aa5d12482bb1 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Thu, 27 Apr 2023 01:00:33 +0100
Subject: [PATCH 5/8] fixup! fixup! fixup! fixup! gh-103656: Transfer f-string
 buffers to parser to avoid use-after-free

---
 Parser/pegen.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Parser/pegen.c b/Parser/pegen.c
index 83aad33aa906cd..07d5e9f4a59843 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -161,7 +161,6 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
             Py_DECREF(parser_token->metadata);
             return -1;
         }
-        Py_DECREF(parser_token->metadata);
         parser_token->metadata = new_token->metadata;
     }
 

From e84a02bacc0183c10d3d905641eb5ad0282a127c Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Thu, 27 Apr 2023 01:26:30 +0100
Subject: [PATCH 6/8] Fix refleaks

---
 Parser/pegen.c        | 10 +++++++---
 Parser/pegen_errors.c |  2 ++
 Parser/tokenizer.c    |  3 +++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/Parser/pegen.c b/Parser/pegen.c
index 07d5e9f4a59843..da410ea84ecb8e 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -162,6 +162,7 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
             return -1;
         }
         parser_token->metadata = new_token->metadata;
+        new_token->metadata = NULL;
     }
 
     parser_token->level = new_token->level;
@@ -216,14 +217,14 @@ _PyPegen_fill_token(Parser *p)
         char *tag = PyMem_Malloc(len + 1);
         if (tag == NULL) {
             PyErr_NoMemory();
-            return -1;
+            goto error;
         }
         strncpy(tag, new_token.start, len);
         tag[len] = '\0';
         // Ownership of tag passes to the growable array
         if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
             PyErr_NoMemory();
-            return -1;
+            goto error;
         }
         type = _PyTokenizer_Get(p->tok, &new_token);
     }
@@ -244,11 +245,14 @@ _PyPegen_fill_token(Parser *p)
 
     // Check if we are at the limit of the token array capacity and resize if needed
     if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
-        return -1;
+        goto error;
     }
 
     Token *t = p->tokens[p->fill];
     return initialize_token(p, t, &new_token, type);
+error:
+    Py_XDECREF(new_token.metadata);
+    return -1;
 }
 
 #if defined(Py_DEBUG)
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
index e26bad20a27575..1f227da0194e3c 100644
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@@ -165,6 +165,7 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
 
     int ret = 0;
     struct token new_token;
+    new_token.metadata = NULL;
 
     for (;;) {
         switch (_PyTokenizer_Get(p->tok, &new_token)) {
@@ -192,6 +193,7 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
 
 
 exit:
+    Py_XDECREF(new_token.metadata);
     // If we're in an f-string, we want the syntax error in the expression part
     // to propagate, so that tokenizer errors (like expecting '}') that happen afterwards
     // do not swallow it.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index fb41489a990d8b..32f7cfcb4c15bf 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -394,6 +394,9 @@ static int
 set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
     assert(token != NULL);
     assert(c == '}' || c == ':' || c == '!');
+    if (token->metadata) {
+        return 0;
+    }
 
     tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
     PyObject *res = PyUnicode_DecodeUTF8(

From 05293a4fb4d9ab538e60fada83c8b97ee7525b0e Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Thu, 27 Apr 2023 01:34:18 +0100
Subject: [PATCH 7/8] fixup! Fix refleaks

---
 Parser/tokenizer.c | 14 ++++++++++----
 Parser/tokenizer.h |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 32f7cfcb4c15bf..6b5478b3a94ab4 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -111,7 +111,7 @@ tok_new(void)
     tok->interactive_underflow = IUNDERFLOW_NORMAL;
     tok->str = NULL;
     tok->report_warnings = 1;
-    tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0};
+    tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
     tok->tok_mode_stack_index = 0;
     tok->tok_report_warnings = 1;
 #ifdef Py_DEBUG
@@ -394,11 +394,12 @@ static int
 set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
     assert(token != NULL);
     assert(c == '}' || c == ':' || c == '!');
-    if (token->metadata) {
+    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
+
+    if (!tok_mode->f_string_debug || token->metadata) {
         return 0;
     }
 
-    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
     PyObject *res = PyUnicode_DecodeUTF8(
         tok_mode->last_expr_buffer,
         tok_mode->last_expr_size - tok_mode->last_expr_end,
@@ -2237,7 +2238,6 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
         the_current_tok->kind = TOK_FSTRING_MODE;
         the_current_tok->f_string_quote = quote;
-        the_current_tok->f_string_quote_size = quote_size;
         the_current_tok->f_string_start = tok->start;
         the_current_tok->f_string_multi_line_start = tok->line_start;
         the_current_tok->f_string_start_offset = -1;
@@ -2245,6 +2245,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         the_current_tok->last_expr_buffer = NULL;
         the_current_tok->last_expr_size = 0;
         the_current_tok->last_expr_end = -1;
+        the_current_tok->f_string_debug = 0;
 
         switch (*tok->start) {
             case 'F':
@@ -2468,6 +2469,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
             if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
                 current_tok->curly_bracket_expr_start_depth--;
                 current_tok->kind = TOK_FSTRING_MODE;
+                current_tok->f_string_debug = 0;
             }
         }
         break;
@@ -2481,6 +2483,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         return MAKE_TOKEN(syntaxerror(tok, "invalid non-printable character U+%s", hex));
     }
 
+    if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
+        current_tok->f_string_debug = 1;
+    }
+
     /* Punctuation character */
     p_start = tok->start;
     p_end = tok->cur;
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 97b54dc2a625ef..8b4213c4ce3b5a 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -59,6 +59,7 @@ typedef struct _tokenizer_mode {
     Py_ssize_t last_expr_size;
     Py_ssize_t last_expr_end;
     char* last_expr_buffer;
+    int f_string_debug;
 } tokenizer_mode;
 
 /* Tokenizer state */

From 87805024cd787205dc8d7276f6e2e36e33cdfa3d Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Wed, 26 Apr 2023 19:04:01 -0600
Subject: [PATCH 8/8] Add quote_size back in

---
 Parser/tokenizer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 6b5478b3a94ab4..8de0572a1fc459 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -2238,6 +2238,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
         the_current_tok->kind = TOK_FSTRING_MODE;
         the_current_tok->f_string_quote = quote;
+        the_current_tok->f_string_quote_size = quote_size;
         the_current_tok->f_string_start = tok->start;
         the_current_tok->f_string_multi_line_start = tok->line_start;
         the_current_tok->f_string_start_offset = -1;