From 0a825ea03bde7f0ef1a4a17a6d62ba6cb6cade86 Mon Sep 17 00:00:00 2001
From: Batuhan Taskaya <batuhan@python.org>
Date: Mon, 5 Jul 2021 19:48:39 +0300
Subject: [PATCH 1/7] bpo-43950: Specialize tracebacks for subscripts/binary
 ops

---
 Python/traceback.c | 195 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 181 insertions(+), 14 deletions(-)
diff --git a/Python/traceback.c b/Python/traceback.c
index a60f9916424337..745376490efcbf 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -7,6 +7,10 @@
 #include "pycore_interp.h"        // PyInterpreterState.gc
 #include "frameobject.h"          // PyFrame_GetBack()
 #include "pycore_frame.h"         // _PyFrame_GetCode()
+#include "pycore_pyarena.h"       // _PyArena_Free()
+#include "pycore_ast.h"           // asdl_seq_*
+#include "pycore_compile.h"           // asdl_seq_*
+#include "pycore_parser.h"        // _PyParser_ASTFromString
 #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 #include "structmember.h"         // PyMemberDef
 #include "osdefs.h"               // SEP
@@ -512,8 +516,147 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i
     return err;
 }
 
+/* AST based Traceback Specialization
+ *
+ * When displaying a new traceback line, for certain syntactical constructs
+ * (e.g a subscript, an arithmetic operation) we try to create a representation
+ * that separates the primary source of error from the rest.
+ * 
+ * Example specialization of BinOp nodes:
+ *  Traceback (most recent call last):
+ *    File "/home/isidentical/cpython/cpython/t.py", line 10, in <module>
+ *      add_values(1, 2, 'x', 3, 4)
+ *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ *    File "/home/isidentical/cpython/cpython/t.py", line 2, in add_values
+ *      return a + b + c + d + e
+ *             ~~~~~~^~~
+ *  TypeError: 'NoneType' object is not subscriptable
+ */
+
+#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
+
+static int
+extract_anchors_from_expr(PyObject *segment, expr_ty expr, int *left_anchor, int *right_anchor)
+{
+    switch (expr->kind) {
+        case BinOp_kind: {
+            PyObject *operator = PyUnicode_Substring(segment, expr->v.BinOp.left->end_col_offset,
+                                                     expr->v.BinOp.right->col_offset);
+            if (!operator) {
+                return -1;
+            }
+
+            const char *operator_str = PyUnicode_AsUTF8(operator);
+            if (!operator_str) {
+                Py_DECREF(operator);
+                return -1;
+            }
+
+            Py_ssize_t i, len = PyUnicode_GET_LENGTH(operator);
+            for (i = 0; i < len; i++) {
+                if (IS_WHITESPACE(operator_str[i])) {
+                    continue;
+                }
+
+                int index = Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
+                *left_anchor = expr->v.BinOp.left->end_col_offset + index;
+                *right_anchor = expr->v.BinOp.left->end_col_offset + index + 1;
+                if (i + 1 < len && !IS_WHITESPACE(operator_str[i + 1])) {
+                    ++*right_anchor;
+                }
+                break;
+            }
+            Py_DECREF(operator);
+            return 1;
+        }
+        case Subscript_kind: {
+            *left_anchor = expr->v.Subscript.value->end_col_offset;
+            *right_anchor = expr->v.Subscript.slice->end_col_offset + 1;
+            return 1;
+        }
+        default:
+            return 0;
+    }
+}
+
+static int
+extract_anchors_from_stmt(PyObject *segment, stmt_ty statement, int *left_anchor, int *right_anchor)
+{
+    switch (statement->kind) {
+        case Expr_kind: {
+            return extract_anchors_from_expr(segment, statement->v.Expr.value, left_anchor, right_anchor);
+        }
+        default:
+            return 0;
+    }
+}
+
+static int
+extract_anchors_from_line(PyObject *filename, PyObject *line,
+                          Py_ssize_t start_offset, Py_ssize_t end_offset,
+                          int *left_anchor, int *right_anchor)
+{
+    int res = -1;
+    PyArena *arena = NULL;
+    PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset);
+    if (!segment) {
+        goto done;
+    }
+
+    const char *segment_str = PyUnicode_AsUTF8(segment);
+    if (!segment) {
+        goto done;
+    }
+
+    arena = _PyArena_New();
+    if (!arena) {
+        goto done;
+    }
+
+    PyCompilerFlags flags = _PyCompilerFlags_INIT;
+
+    _PyASTOptimizeState state;
+    state.optimize = _Py_GetConfig()->optimization_level;
+    state.ff_features = 0;
+
+    mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input,
+                                            &flags, arena);
+    if (!module) {
+        goto done;
+    }
+    if (!_PyAST_Optimize(module, arena, &state)) {
+        goto done;
+    }
+
+    assert(module->kind == Module_kind);
+    if (asdl_seq_LEN(module->v.Module.body) == 1) {
+        stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
+        res = extract_anchors_from_stmt(segment, statement, left_anchor, right_anchor);
+    } else {
+        res = 0;
+    }
+
+done:
+    Py_XDECREF(segment);
+    if (arena) {
+        _PyArena_Free(arena);
+    }
+    return res;
+}
+
 #define _TRACEBACK_SOURCE_LINE_INDENT 4
 
+static inline int
+ignore_source_errors(void) {
+    if (PyErr_Occurred()) {
+        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
+            return -1;
+        }
+        PyErr_Clear();
+    }
+    return 0;
+}
+
 static int
 tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
                PyFrameObject *frame, PyObject *name)
@@ -544,7 +687,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
         int start_col_byte_offset;
         int end_col_byte_offset;
         if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
-                                 &end_line, &end_col_byte_offset)) {
+                                  &end_line, &end_col_byte_offset)) {
             goto done;
         }
         if (start_line != end_line) {
@@ -554,29 +697,53 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
         if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
             goto done;
         }
+
         // Convert the utf-8 byte offset to the actual character offset so we
         // print the right number of carets.
-        Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
-        Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
+        Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset;
+        Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset;
 
-        char offset = truncation;
-        while (++offset <= start_offset) {
-            err = PyFile_WriteString(" ", f);
-            if (err < 0) {
-                goto done;
+        if (source_line) {
+            start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
+            end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
+        }
+
+        const char *primary, *secondary;
+        primary = secondary = "^";
+
+        int left_end_offset, right_start_offset;
+        left_end_offset = right_start_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int);
+
+        if (source_line) {
+            int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
+                                                &left_end_offset, &right_start_offset);
+            if (res < 0) {
+                err = ignore_source_errors();
+                if (err < 0) {
+                    goto done;
+                }
+            } else if (res > 0) {
+                primary = "^";
+                secondary = "~";
             }
         }
-        while (++offset <= end_offset + 1) {
-            err = PyFile_WriteString("^", f);
-            if (err < 0) {
-                goto done;
+
+        char offset = truncation;
+        while (++offset <= end_offset) {
+            if (offset <= start_offset) {
+                err = PyFile_WriteString(" ", f);
+            } else if (offset <= left_end_offset + start_offset) {
+                err = PyFile_WriteString(secondary, f);
+            } else if (offset <= right_start_offset + start_offset) {
+                err = PyFile_WriteString(primary, f);
+            } else {
+                err = PyFile_WriteString(secondary, f);
             }
         }
         err = PyFile_WriteString("\n", f);
     }
-
     else {
-        PyErr_Clear();
+        err = ignore_source_errors();
     }
     
 done:

From d79d365a8b08eb4197502f4fc86a2bc9bf8c2c34 Mon Sep 17 00:00:00 2001
From: Ammar Askar <ammar@ammaraskar.com>
Date: Sat, 10 Jul 2021 12:06:35 +0300
Subject: [PATCH 2/7] Port new specializations to traceback.py

Co-authored-by: Batuhan Taskaya <batuhanosmantaskaya@gmail.com>
---
 Doc/library/traceback.rst  |  4 +--
 Lib/test/test_traceback.py | 57 +++++++++++++++++++++++++++++++++++++-
 Lib/traceback.py           | 49 +++++++++++++++++++++++++++++++-
 3 files changed, 106 insertions(+), 4 deletions(-)

diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst
index 1961b9a435bd35..be1f43ea953edb 100644
--- a/Doc/library/traceback.rst
+++ b/Doc/library/traceback.rst
@@ -473,7 +473,7 @@ The output for the example would look similar to this:
    ['Traceback (most recent call last):\n',
     '  File "<doctest default[0]>", line 10, in <module>\n    lumberjack()\n    ^^^^^^^^^^^^\n',
     '  File "<doctest default[0]>", line 4, in lumberjack\n    bright_side_of_death()\n    ^^^^^^^^^^^^^^^^^^^^^^\n',
-    '  File "<doctest default[0]>", line 7, in bright_side_of_death\n    return tuple()[0]\n           ^^^^^^^^^^\n',
+    '  File "<doctest default[0]>", line 7, in bright_side_of_death\n    return tuple()[0]\n           ~~~~~~~^^^\n',
     'IndexError: tuple index out of range\n']
    *** extract_tb:
    [<FrameSummary file <doctest...>, line 10 in <module>>,
@@ -482,7 +482,7 @@ The output for the example would look similar to this:
    *** format_tb:
    ['  File "<doctest default[0]>", line 10, in <module>\n    lumberjack()\n    ^^^^^^^^^^^^\n',
     '  File "<doctest default[0]>", line 4, in lumberjack\n    bright_side_of_death()\n    ^^^^^^^^^^^^^^^^^^^^^^\n',
-    '  File "<doctest default[0]>", line 7, in bright_side_of_death\n    return tuple()[0]\n           ^^^^^^^^^^\n']
+    '  File "<doctest default[0]>", line 7, in bright_side_of_death\n    return tuple()[0]\n           ~~~~~~~^^^\n']
    *** tb_lineno: 10
 
 
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 50ebccef82a3fa..3c9916a23d2240 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -406,6 +406,61 @@ def f_with_multiline():
         result_lines = self.get_exception(f_with_multiline)
         self.assertEqual(result_lines, expected_f.splitlines())
 
+    def test_caret_for_binary_operators(self):
+        def f_with_binary_operator():
+            divisor = 20
+            return 10 + divisor / 0 + 30
+
+        lineno_f = f_with_binary_operator.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ^^^^^^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
+            '    return 10 + divisor / 0 + 30\n'
+            '                ~~~~~~~~^~~\n'
+        )
+        result_lines = self.get_exception(f_with_binary_operator)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+    def test_caret_for_binary_operators_two_char(self):
+        def f_with_binary_operator():
+            divisor = 20
+            return 10 + divisor // 0 + 30
+
+        lineno_f = f_with_binary_operator.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ^^^^^^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
+            '    return 10 + divisor // 0 + 30\n'
+            '                ~~~~~~~~^^~~\n'
+        )
+        result_lines = self.get_exception(f_with_binary_operator)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+    def test_caret_for_subscript(self):
+        def f_with_subscript():
+            some_dict = {'x': {'y': None}}
+            return some_dict['x']['y']['z']
+
+        lineno_f = f_with_subscript.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ^^^^^^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f_with_subscript\n'
+            "    return some_dict['x']['y']['z']\n"
+            '           ~~~~~~~~~~~~~~~~~~~^^^^^\n'
+        )
+        result_lines = self.get_exception(f_with_subscript)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+
 
 @cpython_only
 @requires_debug_ranges()
@@ -1615,7 +1670,7 @@ def f():
         self.assertEqual(
             output.getvalue().split('\n')[-5:],
             ['    x/0',
-             '    ^^^',
+             '    ~^~',
              '    x = 12',
              'ZeroDivisionError: division by zero',
              ''])
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 7cb124188aca8a..35f02e827d3fc6 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -494,9 +494,23 @@ def format(self):
                     colno = _byte_offset_to_character_offset(frame._original_line, frame.colno)
                     end_colno = _byte_offset_to_character_offset(frame._original_line, frame.end_colno)
 
+                    try:
+                        anchors = _extract_caret_anchors_from_line_segment(
+                            frame._original_line[colno - 1:end_colno]
+                        )
+                    except Exception:
+                        anchors = None
+
                     row.append('    ')
                     row.append(' ' * (colno - stripped_characters))
-                    row.append('^' * (end_colno - colno))
+
+                    if anchors:
+                        row.append('~' * (anchors[0]))
+                        row.append('^' * (anchors[1] - anchors[0]))
+                        row.append('~' * (end_colno - colno - anchors[1]))
+                    else:
+                        row.append('^' * (end_colno - colno))
+
                     row.append('\n')
 
             if frame.locals:
@@ -520,6 +534,39 @@ def _byte_offset_to_character_offset(str, offset):
     return len(as_utf8[:offset + 1].decode("utf-8"))
 
 
+def _extract_caret_anchors_from_line_segment(segment):
+    import ast
+
+    try:
+        tree = ast.parse(segment)
+    except SyntaxError:
+        return None
+
+    if len(tree.body) != 1:
+        return None
+
+    statement = tree.body[0]
+    match statement:
+        case ast.Expr(expr):
+            match expr:
+                case ast.BinOp():
+                    operator_str = segment[expr.left.end_col_offset:expr.right.col_offset]
+                    operator_offset = len(operator_str) - len(operator_str.lstrip())
+
+                    left_anchor = expr.left.end_col_offset + operator_offset
+                    right_anchor = left_anchor + 1
+                    if (
+                        operator_offset + 1 < len(operator_str)
+                        and not operator_str[operator_offset + 1].isspace()
+                    ):
+                        right_anchor += 1
+                    return left_anchor, right_anchor
+                case ast.Subscript():
+                    return expr.value.end_col_offset, expr.slice.end_col_offset + 1
+
+    return None
+
+
 class TracebackException:
     """An exception ready for rendering.
 

From 26430d41967329edc8f2f09abe1f57b94b261799 Mon Sep 17 00:00:00 2001
From: Batuhan Taskaya <batuhan@python.org>
Date: Sat, 10 Jul 2021 20:52:59 +0300
Subject: [PATCH 3/7] pass segmen_str around

---
 Python/traceback.c | 43 ++++++++++++++++---------------------------
 1 file changed, 16 insertions(+), 27 deletions(-)

diff --git a/Python/traceback.c b/Python/traceback.c
index 745376490efcbf..286f24cac65c00 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -9,7 +9,7 @@
 #include "pycore_frame.h"         // _PyFrame_GetCode()
 #include "pycore_pyarena.h"       // _PyArena_Free()
 #include "pycore_ast.h"           // asdl_seq_*
-#include "pycore_compile.h"           // asdl_seq_*
+#include "pycore_compile.h"       // _PyAST_Optimize
 #include "pycore_parser.h"        // _PyParser_ASTFromString
 #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 #include "structmember.h"         // PyMemberDef
@@ -536,37 +536,26 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i
 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
 
 static int
-extract_anchors_from_expr(PyObject *segment, expr_ty expr, int *left_anchor, int *right_anchor)
+extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_anchor, int *right_anchor)
 {
     switch (expr->kind) {
         case BinOp_kind: {
-            PyObject *operator = PyUnicode_Substring(segment, expr->v.BinOp.left->end_col_offset,
-                                                     expr->v.BinOp.right->col_offset);
-            if (!operator) {
-                return -1;
-            }
-
-            const char *operator_str = PyUnicode_AsUTF8(operator);
-            if (!operator_str) {
-                Py_DECREF(operator);
-                return -1;
-            }
-
-            Py_ssize_t i, len = PyUnicode_GET_LENGTH(operator);
-            for (i = 0; i < len; i++) {
-                if (IS_WHITESPACE(operator_str[i])) {
+            expr_ty left = expr->v.BinOp.left;
+            expr_ty right = expr->v.BinOp.right;
+            for (int i = left->end_col_offset + 1; i < right->col_offset; i++) {
+                if (IS_WHITESPACE(segment_str[i])) {
                     continue;
                 }
 
-                int index = Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
-                *left_anchor = expr->v.BinOp.left->end_col_offset + index;
-                *right_anchor = expr->v.BinOp.left->end_col_offset + index + 1;
-                if (i + 1 < len && !IS_WHITESPACE(operator_str[i + 1])) {
+                *left_anchor = i;
+                *right_anchor = i + 1;
+
+                // Check whether if this a two-character operator (e.g //)
+                if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) {
                     ++*right_anchor;
                 }
                 break;
             }
-            Py_DECREF(operator);
             return 1;
         }
         case Subscript_kind: {
@@ -580,11 +569,11 @@ extract_anchors_from_expr(PyObject *segment, expr_ty expr, int *left_anchor, int
 }
 
 static int
-extract_anchors_from_stmt(PyObject *segment, stmt_ty statement, int *left_anchor, int *right_anchor)
+extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_anchor, int *right_anchor)
 {
     switch (statement->kind) {
         case Expr_kind: {
-            return extract_anchors_from_expr(segment, statement->v.Expr.value, left_anchor, right_anchor);
+            return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor);
         }
         default:
             return 0;
@@ -631,7 +620,7 @@ extract_anchors_from_line(PyObject *filename, PyObject *line,
     assert(module->kind == Module_kind);
     if (asdl_seq_LEN(module->v.Module.body) == 1) {
         stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
-        res = extract_anchors_from_stmt(segment, statement, left_anchor, right_anchor);
+        res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor);
     } else {
         res = 0;
     }
@@ -711,8 +700,8 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
         const char *primary, *secondary;
         primary = secondary = "^";
 
-        int left_end_offset, right_start_offset;
-        left_end_offset = right_start_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int);
+        int left_end_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int);
+        int right_start_offset = left_end_offset;
 
         if (source_line) {
             int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,

From 081738df614f1e533901945aaa4ca0b65c54823d Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Mon, 12 Jul 2021 03:57:48 +0100
Subject: [PATCH 4/7] Refactor and simplify the traceback code

---
 Python/traceback.c | 169 +++++++++++++++++++++++++++------------------
 1 file changed, 100 insertions(+), 69 deletions(-)

diff --git a/Python/traceback.c b/Python/traceback.c
index 286f24cac65c00..9add00e0866698 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -536,9 +536,10 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i
 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
 
 static int
-extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_anchor, int *right_anchor)
+extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
+                          char** primary_error_char, char** secondary_error_char)
 {
-    switch (expr->kind) {
+switch (expr->kind) {
         case BinOp_kind: {
             expr_ty left = expr->v.BinOp.left;
             expr_ty right = expr->v.BinOp.right;
@@ -554,6 +555,10 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho
                 if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) {
                     ++*right_anchor;
                 }
+
+                // Set the error characters
+                *primary_error_char = "~";
+                *secondary_error_char = "^";
                 break;
             }
             return 1;
@@ -561,6 +566,10 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho
         case Subscript_kind: {
             *left_anchor = expr->v.Subscript.value->end_col_offset;
             *right_anchor = expr->v.Subscript.slice->end_col_offset + 1;
+
+            // Set the error characters
+            *primary_error_char = "~";
+            *secondary_error_char = "^";
             return 1;
         }
         default:
@@ -569,11 +578,13 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho
 }
 
 static int
-extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_anchor, int *right_anchor)
+extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
+                          char** primary_error_char, char** secondary_error_char)
 {
     switch (statement->kind) {
         case Expr_kind: {
-            return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor);
+            return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor,
+                                             primary_error_char, secondary_error_char);
         }
         default:
             return 0;
@@ -583,7 +594,8 @@ extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_
 static int
 extract_anchors_from_line(PyObject *filename, PyObject *line,
                           Py_ssize_t start_offset, Py_ssize_t end_offset,
-                          int *left_anchor, int *right_anchor)
+                          Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
+                          char** primary_error_char, char** secondary_error_char)
 {
     int res = -1;
     PyArena *arena = NULL;
@@ -620,12 +632,17 @@ extract_anchors_from_line(PyObject *filename, PyObject *line,
     assert(module->kind == Module_kind);
     if (asdl_seq_LEN(module->v.Module.body) == 1) {
         stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
-        res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor);
+        res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor,
+                                        primary_error_char, secondary_error_char);
     } else {
         res = 0;
     }
 
 done:
+    if (res > 0) {
+        *left_anchor += start_offset;
+        *right_anchor += start_offset;
+    }
     Py_XDECREF(segment);
     if (arena) {
         _PyArena_Free(arena);
@@ -646,6 +663,25 @@ ignore_source_errors(void) {
     return 0;
 }
 
+static inline int
+print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset,
+                            Py_ssize_t right_start_offset, Py_ssize_t left_end_offset,
+                            const char *primary, const char *secondary) {
+    int err = 0;
+    int special_chars = (left_end_offset != -1 || right_start_offset != -1);
+    while (++offset <= end_offset) {
+        if (offset <= start_offset || offset > end_offset) {
+            err = PyFile_WriteString(" ", f);
+        } else if (special_chars && left_end_offset < offset && offset <= right_start_offset) {
+            err = PyFile_WriteString(secondary, f);
+        } else {
+            err = PyFile_WriteString(primary, f);
+        }
+    }
+    err = PyFile_WriteString("\n", f);
+    return err;
+}
+
 static int
 tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
                PyFrameObject *frame, PyObject *name)
@@ -665,76 +701,71 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
         return err;
     int truncation = _TRACEBACK_SOURCE_LINE_INDENT;
     PyObject* source_line = NULL;
-    /* ignore errors since we can't report them, can we? */
-    if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
-                               &truncation, &source_line)) {
-        int code_offset = tb->tb_lasti;
-        PyCodeObject* code = _PyFrame_GetCode(frame);
-
-        int start_line;
-        int end_line;
-        int start_col_byte_offset;
-        int end_col_byte_offset;
-        if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
-                                  &end_line, &end_col_byte_offset)) {
-            goto done;
-        }
-        if (start_line != end_line) {
-            goto done;
-        }
 
-        if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
-            goto done;
-        }
-
-        // Convert the utf-8 byte offset to the actual character offset so we
-        // print the right number of carets.
-        Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset;
-        Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset;
-
-        if (source_line) {
-            start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
-            end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
-        }
+    if (_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
+                               &truncation, &source_line) != 0) {
+        /* ignore errors since we can't report them, can we? */
+        err = ignore_source_errors();
+        goto done;
+    }
 
-        const char *primary, *secondary;
-        primary = secondary = "^";
+    int code_offset = tb->tb_lasti;
+    PyCodeObject* code = _PyFrame_GetCode(frame);
 
-        int left_end_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int);
-        int right_start_offset = left_end_offset;
+    int start_line;
+    int end_line;
+    int start_col_byte_offset;
+    int end_col_byte_offset;
+    if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
+                              &end_line, &end_col_byte_offset)) {
+        goto done;
+    }
+    if (start_line != end_line) {
+        goto done;
+    }
 
-        if (source_line) {
-            int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
-                                                &left_end_offset, &right_start_offset);
-            if (res < 0) {
-                err = ignore_source_errors();
-                if (err < 0) {
-                    goto done;
-                }
-            } else if (res > 0) {
-                primary = "^";
-                secondary = "~";
-            }
-        }
+    if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
+        goto done;
+    }
 
-        char offset = truncation;
-        while (++offset <= end_offset) {
-            if (offset <= start_offset) {
-                err = PyFile_WriteString(" ", f);
-            } else if (offset <= left_end_offset + start_offset) {
-                err = PyFile_WriteString(secondary, f);
-            } else if (offset <= right_start_offset + start_offset) {
-                err = PyFile_WriteString(primary, f);
-            } else {
-                err = PyFile_WriteString(secondary, f);
-            }
+    // When displaying errors, we will use the following generic structure:
+    //
+    //  ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
+    //        ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
+    //        |              |-> left_end_offset     |                  |-> left_offset
+    //        |-> start_offset                       |-> right_start_offset
+    //
+    // In general we will only have (start_offset, end_offset) but we can gather more information
+    // by analyzing the AST of the text between *start_offset* and *end_offset*. If this succeeds
+    // we could get *left_end_offset* and *right_start_offset* and some selection of characters for
+    // the different ranges (primary_error_char and secondary_error_char). If we cannot obtain the
+    // AST information or we cannot identify special ranges within it, then left_end_offset and
+    // right_end_offset will be set to -1.
+
+    // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
+    Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset;
+    Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset;
+    Py_ssize_t left_end_offset = -1;
+    Py_ssize_t right_start_offset = -1;
+
+    char *primary_error_char = "^";
+    char *secondary_error_char = primary_error_char;
+
+    if (source_line) {
+        start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
+        end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
+        int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
+                                            &left_end_offset, &right_start_offset,
+                                            &primary_error_char, &secondary_error_char);
+        if (res < 0 && ignore_source_errors() < 0) {
+            goto done;
         }
-        err = PyFile_WriteString("\n", f);
     }
-    else {
-        err = ignore_source_errors();
-    }
-    
+
+    err = print_error_location_carets(f, truncation, start_offset, end_offset,
+                                      right_start_offset, left_end_offset,
+                                      primary_error_char, secondary_error_char);
+
 done:
     Py_XDECREF(source_line);
     return err;

From 045c49184b082ed7476bd72f454b910d435fc278 Mon Sep 17 00:00:00 2001
From: Batuhan Taskaya <batuhan@python.org>
Date: Mon, 12 Jul 2021 20:14:40 +0300
Subject: [PATCH 5/7] make amendments

---
 Lib/test/test_traceback.py | 27 ++++++++++++++++++++++++++-
 Lib/traceback.py           | 21 ++++++++++++++++-----
 Python/traceback.c         | 21 +++++++++------------
 3 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 3c9916a23d2240..82dce41fa8c13e 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -6,15 +6,18 @@
 import sys
 import inspect
 import unittest
+import tempfile
 import re
 from test import support
 from test.support import (Error, captured_output, cpython_only, ALWAYS_EQ,
                           requires_debug_ranges, has_no_debug_ranges)
 from test.support.os_helper import TESTFN, unlink
 from test.support.script_helper import assert_python_ok, assert_python_failure
-import textwrap
 
+import os
+import textwrap
 import traceback
+from functools import partial
 
 
 test_code = namedtuple('code', ['co_filename', 'co_name'])
@@ -460,7 +463,29 @@ def f_with_subscript():
         result_lines = self.get_exception(f_with_subscript)
         self.assertEqual(result_lines, expected_error.splitlines())
 
+    def test_traceback_specialization_with_syntax_error(self):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as file:
+            bytecode = compile("1 / 0 / 1 / 2\n", file.name, "exec")
+
+            # make the file invalid
+            file.write("1 $ 0 / 1 / 2\n")
+            file.flush()
 
+        func = partial(exec, bytecode)
+        result_lines = self.get_exception(func)
+        os.unlink(file.name)
+
+        lineno_f = bytecode.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ^^^^^^^^^^\n'
+            f'  File "{file.name}", line {lineno_f}, in <module>\n'
+            "    1 $ 0 / 1 / 2\n"
+            '    ^^^^^\n'
+        )
+        self.assertEqual(result_lines, expected_error.splitlines())
 
 @cpython_only
 @requires_debug_ranges()
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 35f02e827d3fc6..ec5e20d431feb8 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -505,9 +505,9 @@ def format(self):
                     row.append(' ' * (colno - stripped_characters))
 
                     if anchors:
-                        row.append('~' * (anchors[0]))
-                        row.append('^' * (anchors[1] - anchors[0]))
-                        row.append('~' * (end_colno - colno - anchors[1]))
+                        row.append(anchors.primary_char * (anchors.left_end_offset))
+                        row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
+                        row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset))
                     else:
                         row.append('^' * (end_colno - colno))
 
@@ -534,6 +534,17 @@ def _byte_offset_to_character_offset(str, offset):
     return len(as_utf8[:offset + 1].decode("utf-8"))
 
 
+_Anchors = collections.namedtuple(
+    "_Anchors",
+    [
+        "left_end_offset",
+        "right_start_offset",
+        "primary_char",
+        "secondary_char",
+    ],
+    defaults=["~", "^"]
+)
+
 def _extract_caret_anchors_from_line_segment(segment):
     import ast
 
@@ -560,9 +571,9 @@ def _extract_caret_anchors_from_line_segment(segment):
                         and not operator_str[operator_offset + 1].isspace()
                     ):
                         right_anchor += 1
-                    return left_anchor, right_anchor
+                    return _Anchors(left_anchor, right_anchor)
                 case ast.Subscript():
-                    return expr.value.end_col_offset, expr.slice.end_col_offset + 1
+                    return _Anchors(expr.value.end_col_offset, expr.slice.end_col_offset + 1)
 
     return None
 
diff --git a/Python/traceback.c b/Python/traceback.c
index 9add00e0866698..199d3ea7596bf8 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -539,7 +539,7 @@ static int
 extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
                           char** primary_error_char, char** secondary_error_char)
 {
-switch (expr->kind) {
+    switch (expr->kind) {
         case BinOp_kind: {
             expr_ty left = expr->v.BinOp.left;
             expr_ty right = expr->v.BinOp.right;
@@ -743,23 +743,20 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // right_end_offset will be set to -1.
 
     // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
-    Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset;
-    Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset;
+    assert(source_line);
+    Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
+    Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
     Py_ssize_t left_end_offset = -1;
     Py_ssize_t right_start_offset = -1;
 
     char *primary_error_char = "^";
     char *secondary_error_char = primary_error_char;
 
-    if (source_line) {
-        start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
-        end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
-        int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
-                                            &left_end_offset, &right_start_offset,
-                                            &primary_error_char, &secondary_error_char);
-        if (res < 0 && ignore_source_errors() < 0) {
-            goto done;
-        }
+    int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
+                                        &left_end_offset, &right_start_offset,
+                                        &primary_error_char, &secondary_error_char);
+    if (res < 0 && ignore_source_errors() < 0) {
+        goto done;
     }
 
     err = print_error_location_carets(f, truncation, start_offset, end_offset,

From 012ae63a7863b2c2cc07ce7204d46dab581d50e5 Mon Sep 17 00:00:00 2001
From: Ammar Askar <ammar@ammaraskar.com>
Date: Mon, 12 Jul 2021 14:14:51 -0400
Subject: [PATCH 6/7] Use TESTFN instead of NamedTemporaryFile

---
 Lib/test/test_traceback.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 82dce41fa8c13e..0c1af090544f13 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -464,16 +464,15 @@ def f_with_subscript():
         self.assertEqual(result_lines, expected_error.splitlines())
 
     def test_traceback_specialization_with_syntax_error(self):
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as file:
-            bytecode = compile("1 / 0 / 1 / 2\n", file.name, "exec")
+        bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec")
 
-            # make the file invalid
+        with open(TESTFN, "w") as file:
+            # make the file's contents invalid
             file.write("1 $ 0 / 1 / 2\n")
-            file.flush()
+        self.addCleanup(unlink, TESTFN)
 
         func = partial(exec, bytecode)
         result_lines = self.get_exception(func)
-        os.unlink(file.name)
 
         lineno_f = bytecode.co_firstlineno
         expected_error = (
@@ -481,7 +480,7 @@ def test_traceback_specialization_with_syntax_error(self):
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
             '    ^^^^^^^^^^\n'
-            f'  File "{file.name}", line {lineno_f}, in <module>\n'
+            f'  File "{TESTFN}", line {lineno_f}, in <module>\n'
             "    1 $ 0 / 1 / 2\n"
             '    ^^^^^\n'
         )

From 825e3b09c70ae2716cf85a31218cf13a1ec91288 Mon Sep 17 00:00:00 2001
From: Ammar Askar <ammar@ammaraskar.com>
Date: Mon, 12 Jul 2021 14:15:50 -0400
Subject: [PATCH 7/7] Remove tempfile import

---
 Lib/test/test_traceback.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 0c1af090544f13..8baf38c1afd5d6 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -6,7 +6,6 @@
 import sys
 import inspect
 import unittest
-import tempfile
 import re
 from test import support
 from test.support import (Error, captured_output, cpython_only, ALWAYS_EQ,