From 0a825ea03bde7f0ef1a4a17a6d62ba6cb6cade86 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Mon, 5 Jul 2021 19:48:39 +0300 Subject: [PATCH 1/7] bpo-43950: Specialize tracebacks for subscripts/binary ops --- Python/traceback.c | 195 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 181 insertions(+), 14 deletions(-) diff --git a/Python/traceback.c b/Python/traceback.c index a60f9916424337..745376490efcbf 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -7,6 +7,10 @@ #include "pycore_interp.h" // PyInterpreterState.gc #include "frameobject.h" // PyFrame_GetBack() #include "pycore_frame.h" // _PyFrame_GetCode() +#include "pycore_pyarena.h" // _PyArena_Free() +#include "pycore_ast.h" // asdl_seq_* +#include "pycore_compile.h" // asdl_seq_* +#include "pycore_parser.h" // _PyParser_ASTFromString #include "../Parser/pegen.h" // _PyPegen_byte_offset_to_character_offset() #include "structmember.h" // PyMemberDef #include "osdefs.h" // SEP @@ -512,8 +516,147 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i return err; } +/* AST based Traceback Specialization + * + * When displaying a new traceback line, for certain syntactical constructs + * (e.g a subscript, an arithmetic operation) we try to create a representation + * that separates the primary source of error from the rest. + * + * Example specialization of BinOp nodes: + * Traceback (most recent call last): + * File "/home/isidentical/cpython/cpython/t.py", line 10, in + * add_values(1, 2, 'x', 3, 4) + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * File "/home/isidentical/cpython/cpython/t.py", line 2, in add_values + * return a + b + c + d + e + * ~~~~~~^~~ + * TypeError: 'NoneType' object is not subscriptable + */ + +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f')) + +static int +extract_anchors_from_expr(PyObject *segment, expr_ty expr, int *left_anchor, int *right_anchor) +{ + switch (expr->kind) { + case BinOp_kind: { + PyObject *operator = PyUnicode_Substring(segment, expr->v.BinOp.left->end_col_offset, + expr->v.BinOp.right->col_offset); + if (!operator) { + return -1; + } + + const char *operator_str = PyUnicode_AsUTF8(operator); + if (!operator_str) { + Py_DECREF(operator); + return -1; + } + + Py_ssize_t i, len = PyUnicode_GET_LENGTH(operator); + for (i = 0; i < len; i++) { + if (IS_WHITESPACE(operator_str[i])) { + continue; + } + + int index = Py_SAFE_DOWNCAST(i, Py_ssize_t, int); + *left_anchor = expr->v.BinOp.left->end_col_offset + index; + *right_anchor = expr->v.BinOp.left->end_col_offset + index + 1; + if (i + 1 < len && !IS_WHITESPACE(operator_str[i + 1])) { + ++*right_anchor; + } + break; + } + Py_DECREF(operator); + return 1; + } + case Subscript_kind: { + *left_anchor = expr->v.Subscript.value->end_col_offset; + *right_anchor = expr->v.Subscript.slice->end_col_offset + 1; + return 1; + } + default: + return 0; + } +} + +static int +extract_anchors_from_stmt(PyObject *segment, stmt_ty statement, int *left_anchor, int *right_anchor) +{ + switch (statement->kind) { + case Expr_kind: { + return extract_anchors_from_expr(segment, statement->v.Expr.value, left_anchor, right_anchor); + } + default: + return 0; + } +} + +static int +extract_anchors_from_line(PyObject *filename, PyObject *line, + Py_ssize_t start_offset, Py_ssize_t end_offset, + int *left_anchor, int *right_anchor) +{ + int res = -1; + PyArena *arena = NULL; + PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset); + if (!segment) { + goto done; + } + + const char *segment_str = PyUnicode_AsUTF8(segment); + if (!segment) { + goto done; + } + + arena = _PyArena_New(); + if (!arena) { + goto done; + } + + PyCompilerFlags flags = _PyCompilerFlags_INIT; + + _PyASTOptimizeState state; + state.optimize = _Py_GetConfig()->optimization_level; + state.ff_features = 0; + + mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input, + &flags, arena); + if (!module) { + goto done; + } + if (!_PyAST_Optimize(module, arena, &state)) { + goto done; + } + + assert(module->kind == Module_kind); + if (asdl_seq_LEN(module->v.Module.body) == 1) { + stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0); + res = extract_anchors_from_stmt(segment, statement, left_anchor, right_anchor); + } else { + res = 0; + } + +done: + Py_XDECREF(segment); + if (arena) { + _PyArena_Free(arena); + } + return res; +} + #define _TRACEBACK_SOURCE_LINE_INDENT 4 +static inline int +ignore_source_errors(void) { + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) { + return -1; + } + PyErr_Clear(); + } + return 0; +} + static int tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno, PyFrameObject *frame, PyObject *name) @@ -544,7 +687,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen int start_col_byte_offset; int end_col_byte_offset; if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset, - &end_line, &end_col_byte_offset)) { + &end_line, &end_col_byte_offset)) { goto done; } if (start_line != end_line) { @@ -554,29 +697,53 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen if (start_col_byte_offset < 0 || end_col_byte_offset < 0) { goto done; } + // Convert the utf-8 byte offset to the actual character offset so we // print the right number of carets. - Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); - Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); + Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset; + Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset; - char offset = truncation; - while (++offset <= start_offset) { - err = PyFile_WriteString(" ", f); - if (err < 0) { - goto done; + if (source_line) { + start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); + end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); + } + + const char *primary, *secondary; + primary = secondary = "^"; + + int left_end_offset, right_start_offset; + left_end_offset = right_start_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int); + + if (source_line) { + int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset, + &left_end_offset, &right_start_offset); + if (res < 0) { + err = ignore_source_errors(); + if (err < 0) { + goto done; + } + } else if (res > 0) { + primary = "^"; + secondary = "~"; } } - while (++offset <= end_offset + 1) { - err = PyFile_WriteString("^", f); - if (err < 0) { - goto done; + + char offset = truncation; + while (++offset <= end_offset) { + if (offset <= start_offset) { + err = PyFile_WriteString(" ", f); + } else if (offset <= left_end_offset + start_offset) { + err = PyFile_WriteString(secondary, f); + } else if (offset <= right_start_offset + start_offset) { + err = PyFile_WriteString(primary, f); + } else { + err = PyFile_WriteString(secondary, f); } } err = PyFile_WriteString("\n", f); } - else { - PyErr_Clear(); + err = ignore_source_errors(); } done: From d79d365a8b08eb4197502f4fc86a2bc9bf8c2c34 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Sat, 10 Jul 2021 12:06:35 +0300 Subject: [PATCH 2/7] Port new specializations to traceback.py Co-authored-by: Batuhan Taskaya --- Doc/library/traceback.rst | 4 +-- Lib/test/test_traceback.py | 57 +++++++++++++++++++++++++++++++++++++- Lib/traceback.py | 49 +++++++++++++++++++++++++++++++- 3 files changed, 106 insertions(+), 4 deletions(-) diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 1961b9a435bd35..be1f43ea953edb 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -473,7 +473,7 @@ The output for the example would look similar to this: ['Traceback (most recent call last):\n', ' File "", line 10, in \n lumberjack()\n ^^^^^^^^^^^^\n', ' File "", line 4, in lumberjack\n bright_side_of_death()\n ^^^^^^^^^^^^^^^^^^^^^^\n', - ' File "", line 7, in bright_side_of_death\n return tuple()[0]\n ^^^^^^^^^^\n', + ' File "", line 7, in bright_side_of_death\n return tuple()[0]\n ~~~~~~~^^^\n', 'IndexError: tuple index out of range\n'] *** extract_tb: [, line 10 in >, @@ -482,7 +482,7 @@ The output for the example would look similar to this: *** format_tb: [' File "", line 10, in \n lumberjack()\n ^^^^^^^^^^^^\n', ' File "", line 4, in lumberjack\n bright_side_of_death()\n ^^^^^^^^^^^^^^^^^^^^^^\n', - ' File "", line 7, in bright_side_of_death\n return tuple()[0]\n ^^^^^^^^^^\n'] + ' File "", line 7, in bright_side_of_death\n return tuple()[0]\n ~~~~~~~^^^\n'] *** tb_lineno: 10 diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 50ebccef82a3fa..3c9916a23d2240 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -406,6 +406,61 @@ def f_with_multiline(): result_lines = self.get_exception(f_with_multiline) self.assertEqual(result_lines, expected_f.splitlines()) + def test_caret_for_binary_operators(self): + def f_with_binary_operator(): + divisor = 20 + return 10 + divisor / 0 + 30 + + lineno_f = f_with_binary_operator.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ^^^^^^^^^^\n' + f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n' + ' return 10 + divisor / 0 + 30\n' + ' ~~~~~~~~^~~\n' + ) + result_lines = self.get_exception(f_with_binary_operator) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_binary_operators_two_char(self): + def f_with_binary_operator(): + divisor = 20 + return 10 + divisor // 0 + 30 + + lineno_f = f_with_binary_operator.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ^^^^^^^^^^\n' + f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n' + ' return 10 + divisor // 0 + 30\n' + ' ~~~~~~~~^^~~\n' + ) + result_lines = self.get_exception(f_with_binary_operator) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_subscript(self): + def f_with_subscript(): + some_dict = {'x': {'y': None}} + return some_dict['x']['y']['z'] + + lineno_f = f_with_subscript.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ^^^^^^^^^^\n' + f' File "{__file__}", line {lineno_f+2}, in f_with_subscript\n' + " return some_dict['x']['y']['z']\n" + ' ~~~~~~~~~~~~~~~~~~~^^^^^\n' + ) + result_lines = self.get_exception(f_with_subscript) + self.assertEqual(result_lines, expected_error.splitlines()) + + @cpython_only @requires_debug_ranges() @@ -1615,7 +1670,7 @@ def f(): self.assertEqual( output.getvalue().split('\n')[-5:], [' x/0', - ' ^^^', + ' ~^~', ' x = 12', 'ZeroDivisionError: division by zero', '']) diff --git a/Lib/traceback.py b/Lib/traceback.py index 7cb124188aca8a..35f02e827d3fc6 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -494,9 +494,23 @@ def format(self): colno = _byte_offset_to_character_offset(frame._original_line, frame.colno) end_colno = _byte_offset_to_character_offset(frame._original_line, frame.end_colno) + try: + anchors = _extract_caret_anchors_from_line_segment( + frame._original_line[colno - 1:end_colno] + ) + except Exception: + anchors = None + row.append(' ') row.append(' ' * (colno - stripped_characters)) - row.append('^' * (end_colno - colno)) + + if anchors: + row.append('~' * (anchors[0])) + row.append('^' * (anchors[1] - anchors[0])) + row.append('~' * (end_colno - colno - anchors[1])) + else: + row.append('^' * (end_colno - colno)) + row.append('\n') if frame.locals: @@ -520,6 +534,39 @@ def _byte_offset_to_character_offset(str, offset): return len(as_utf8[:offset + 1].decode("utf-8")) +def _extract_caret_anchors_from_line_segment(segment): + import ast + + try: + tree = ast.parse(segment) + except SyntaxError: + return None + + if len(tree.body) != 1: + return None + + statement = tree.body[0] + match statement: + case ast.Expr(expr): + match expr: + case ast.BinOp(): + operator_str = segment[expr.left.end_col_offset:expr.right.col_offset] + operator_offset = len(operator_str) - len(operator_str.lstrip()) + + left_anchor = expr.left.end_col_offset + operator_offset + right_anchor = left_anchor + 1 + if ( + operator_offset + 1 < len(operator_str) + and not operator_str[operator_offset + 1].isspace() + ): + right_anchor += 1 + return left_anchor, right_anchor + case ast.Subscript(): + return expr.value.end_col_offset, expr.slice.end_col_offset + 1 + + return None + + class TracebackException: """An exception ready for rendering. From 26430d41967329edc8f2f09abe1f57b94b261799 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Sat, 10 Jul 2021 20:52:59 +0300 Subject: [PATCH 3/7] pass segmen_str around --- Python/traceback.c | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/Python/traceback.c b/Python/traceback.c index 745376490efcbf..286f24cac65c00 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -9,7 +9,7 @@ #include "pycore_frame.h" // _PyFrame_GetCode() #include "pycore_pyarena.h" // _PyArena_Free() #include "pycore_ast.h" // asdl_seq_* -#include "pycore_compile.h" // asdl_seq_* +#include "pycore_compile.h" // _PyAST_Optimize #include "pycore_parser.h" // _PyParser_ASTFromString #include "../Parser/pegen.h" // _PyPegen_byte_offset_to_character_offset() #include "structmember.h" // PyMemberDef @@ -536,37 +536,26 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f')) static int -extract_anchors_from_expr(PyObject *segment, expr_ty expr, int *left_anchor, int *right_anchor) +extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_anchor, int *right_anchor) { switch (expr->kind) { case BinOp_kind: { - PyObject *operator = PyUnicode_Substring(segment, expr->v.BinOp.left->end_col_offset, - expr->v.BinOp.right->col_offset); - if (!operator) { - return -1; - } - - const char *operator_str = PyUnicode_AsUTF8(operator); - if (!operator_str) { - Py_DECREF(operator); - return -1; - } - - Py_ssize_t i, len = PyUnicode_GET_LENGTH(operator); - for (i = 0; i < len; i++) { - if (IS_WHITESPACE(operator_str[i])) { + expr_ty left = expr->v.BinOp.left; + expr_ty right = expr->v.BinOp.right; + for (int i = left->end_col_offset + 1; i < right->col_offset; i++) { + if (IS_WHITESPACE(segment_str[i])) { continue; } - int index = Py_SAFE_DOWNCAST(i, Py_ssize_t, int); - *left_anchor = expr->v.BinOp.left->end_col_offset + index; - *right_anchor = expr->v.BinOp.left->end_col_offset + index + 1; - if (i + 1 < len && !IS_WHITESPACE(operator_str[i + 1])) { + *left_anchor = i; + *right_anchor = i + 1; + + // Check whether if this a two-character operator (e.g //) + if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) { ++*right_anchor; } break; } - Py_DECREF(operator); return 1; } case Subscript_kind: { @@ -580,11 +569,11 @@ extract_anchors_from_expr(PyObject *segment, expr_ty expr, int *left_anchor, int } static int -extract_anchors_from_stmt(PyObject *segment, stmt_ty statement, int *left_anchor, int *right_anchor) +extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_anchor, int *right_anchor) { switch (statement->kind) { case Expr_kind: { - return extract_anchors_from_expr(segment, statement->v.Expr.value, left_anchor, right_anchor); + return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor); } default: return 0; @@ -631,7 +620,7 @@ extract_anchors_from_line(PyObject *filename, PyObject *line, assert(module->kind == Module_kind); if (asdl_seq_LEN(module->v.Module.body) == 1) { stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0); - res = extract_anchors_from_stmt(segment, statement, left_anchor, right_anchor); + res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor); } else { res = 0; } @@ -711,8 +700,8 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen const char *primary, *secondary; primary = secondary = "^"; - int left_end_offset, right_start_offset; - left_end_offset = right_start_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int); + int left_end_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int); + int right_start_offset = left_end_offset; if (source_line) { int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset, From 081738df614f1e533901945aaa4ca0b65c54823d Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 12 Jul 2021 03:57:48 +0100 Subject: [PATCH 4/7] Refactor and simplify the traceback code --- Python/traceback.c | 169 +++++++++++++++++++++++++++------------------ 1 file changed, 100 insertions(+), 69 deletions(-) diff --git a/Python/traceback.c b/Python/traceback.c index 286f24cac65c00..9add00e0866698 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -536,9 +536,10 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f')) static int -extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_anchor, int *right_anchor) +extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor, + char** primary_error_char, char** secondary_error_char) { - switch (expr->kind) { +switch (expr->kind) { case BinOp_kind: { expr_ty left = expr->v.BinOp.left; expr_ty right = expr->v.BinOp.right; @@ -554,6 +555,10 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) { ++*right_anchor; } + + // Set the error characters + *primary_error_char = "~"; + *secondary_error_char = "^"; break; } return 1; @@ -561,6 +566,10 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho case Subscript_kind: { *left_anchor = expr->v.Subscript.value->end_col_offset; *right_anchor = expr->v.Subscript.slice->end_col_offset + 1; + + // Set the error characters + *primary_error_char = "~"; + *secondary_error_char = "^"; return 1; } default: @@ -569,11 +578,13 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho } static int -extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_anchor, int *right_anchor) +extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor, + char** primary_error_char, char** secondary_error_char) { switch (statement->kind) { case Expr_kind: { - return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor); + return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor, + primary_error_char, secondary_error_char); } default: return 0; @@ -583,7 +594,8 @@ extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_ static int extract_anchors_from_line(PyObject *filename, PyObject *line, Py_ssize_t start_offset, Py_ssize_t end_offset, - int *left_anchor, int *right_anchor) + Py_ssize_t *left_anchor, Py_ssize_t *right_anchor, + char** primary_error_char, char** secondary_error_char) { int res = -1; PyArena *arena = NULL; @@ -620,12 +632,17 @@ extract_anchors_from_line(PyObject *filename, PyObject *line, assert(module->kind == Module_kind); if (asdl_seq_LEN(module->v.Module.body) == 1) { stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0); - res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor); + res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor, + primary_error_char, secondary_error_char); } else { res = 0; } done: + if (res > 0) { + *left_anchor += start_offset; + *right_anchor += start_offset; + } Py_XDECREF(segment); if (arena) { _PyArena_Free(arena); @@ -646,6 +663,25 @@ ignore_source_errors(void) { return 0; } +static inline int +print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset, + Py_ssize_t right_start_offset, Py_ssize_t left_end_offset, + const char *primary, const char *secondary) { + int err = 0; + int special_chars = (left_end_offset != -1 || right_start_offset != -1); + while (++offset <= end_offset) { + if (offset <= start_offset || offset > end_offset) { + err = PyFile_WriteString(" ", f); + } else if (special_chars && left_end_offset < offset && offset <= right_start_offset) { + err = PyFile_WriteString(secondary, f); + } else { + err = PyFile_WriteString(primary, f); + } + } + err = PyFile_WriteString("\n", f); + return err; +} + static int tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno, PyFrameObject *frame, PyObject *name) @@ -665,76 +701,71 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen return err; int truncation = _TRACEBACK_SOURCE_LINE_INDENT; PyObject* source_line = NULL; - /* ignore errors since we can't report them, can we? */ - if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT, - &truncation, &source_line)) { - int code_offset = tb->tb_lasti; - PyCodeObject* code = _PyFrame_GetCode(frame); - - int start_line; - int end_line; - int start_col_byte_offset; - int end_col_byte_offset; - if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset, - &end_line, &end_col_byte_offset)) { - goto done; - } - if (start_line != end_line) { - goto done; - } - if (start_col_byte_offset < 0 || end_col_byte_offset < 0) { - goto done; - } - - // Convert the utf-8 byte offset to the actual character offset so we - // print the right number of carets. - Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset; - Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset; - - if (source_line) { - start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); - end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); - } + if (_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT, + &truncation, &source_line) != 0) { + /* ignore errors since we can't report them, can we? */ + err = ignore_source_errors(); + goto done; + } - const char *primary, *secondary; - primary = secondary = "^"; + int code_offset = tb->tb_lasti; + PyCodeObject* code = _PyFrame_GetCode(frame); - int left_end_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int); - int right_start_offset = left_end_offset; + int start_line; + int end_line; + int start_col_byte_offset; + int end_col_byte_offset; + if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset, + &end_line, &end_col_byte_offset)) { + goto done; + } + if (start_line != end_line) { + goto done; + } - if (source_line) { - int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset, - &left_end_offset, &right_start_offset); - if (res < 0) { - err = ignore_source_errors(); - if (err < 0) { - goto done; - } - } else if (res > 0) { - primary = "^"; - secondary = "~"; - } - } + if (start_col_byte_offset < 0 || end_col_byte_offset < 0) { + goto done; + } - char offset = truncation; - while (++offset <= end_offset) { - if (offset <= start_offset) { - err = PyFile_WriteString(" ", f); - } else if (offset <= left_end_offset + start_offset) { - err = PyFile_WriteString(secondary, f); - } else if (offset <= right_start_offset + start_offset) { - err = PyFile_WriteString(primary, f); - } else { - err = PyFile_WriteString(secondary, f); - } + // When displaying errors, we will use the following generic structure: + // + // ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE + // ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~ + // | |-> left_end_offset | |-> left_offset + // |-> start_offset |-> right_start_offset + // + // In general we will only have (start_offset, end_offset) but we can gather more information + // by analyzing the AST of the text between *start_offset* and *end_offset*. If this succeeds + // we could get *left_end_offset* and *right_start_offset* and some selection of characters for + // the different ranges (primary_error_char and secondary_error_char). If we cannot obtain the + // AST information or we cannot identify special ranges within it, then left_end_offset and + // right_end_offset will be set to -1. + + // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets. + Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset; + Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset; + Py_ssize_t left_end_offset = -1; + Py_ssize_t right_start_offset = -1; + + char *primary_error_char = "^"; + char *secondary_error_char = primary_error_char; + + if (source_line) { + start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); + end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); + int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset, + &left_end_offset, &right_start_offset, + &primary_error_char, &secondary_error_char); + if (res < 0 && ignore_source_errors() < 0) { + goto done; } - err = PyFile_WriteString("\n", f); } - else { - err = ignore_source_errors(); - } - + + err = print_error_location_carets(f, truncation, start_offset, end_offset, + right_start_offset, left_end_offset, + primary_error_char, secondary_error_char); + done: Py_XDECREF(source_line); return err; From 045c49184b082ed7476bd72f454b910d435fc278 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Mon, 12 Jul 2021 20:14:40 +0300 Subject: [PATCH 5/7] make amendments --- Lib/test/test_traceback.py | 27 ++++++++++++++++++++++++++- Lib/traceback.py | 21 ++++++++++++++++----- Python/traceback.c | 21 +++++++++------------ 3 files changed, 51 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 3c9916a23d2240..82dce41fa8c13e 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -6,15 +6,18 @@ import sys import inspect import unittest +import tempfile import re from test import support from test.support import (Error, captured_output, cpython_only, ALWAYS_EQ, requires_debug_ranges, has_no_debug_ranges) from test.support.os_helper import TESTFN, unlink from test.support.script_helper import assert_python_ok, assert_python_failure -import textwrap +import os +import textwrap import traceback +from functools import partial test_code = namedtuple('code', ['co_filename', 'co_name']) @@ -460,7 +463,29 @@ def f_with_subscript(): result_lines = self.get_exception(f_with_subscript) self.assertEqual(result_lines, expected_error.splitlines()) + def test_traceback_specialization_with_syntax_error(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as file: + bytecode = compile("1 / 0 / 1 / 2\n", file.name, "exec") + + # make the file invalid + file.write("1 $ 0 / 1 / 2\n") + file.flush() + func = partial(exec, bytecode) + result_lines = self.get_exception(func) + os.unlink(file.name) + + lineno_f = bytecode.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ^^^^^^^^^^\n' + f' File "{file.name}", line {lineno_f}, in \n' + " 1 $ 0 / 1 / 2\n" + ' ^^^^^\n' + ) + self.assertEqual(result_lines, expected_error.splitlines()) @cpython_only @requires_debug_ranges() diff --git a/Lib/traceback.py b/Lib/traceback.py index 35f02e827d3fc6..ec5e20d431feb8 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -505,9 +505,9 @@ def format(self): row.append(' ' * (colno - stripped_characters)) if anchors: - row.append('~' * (anchors[0])) - row.append('^' * (anchors[1] - anchors[0])) - row.append('~' * (end_colno - colno - anchors[1])) + row.append(anchors.primary_char * (anchors.left_end_offset)) + row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset)) + row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset)) else: row.append('^' * (end_colno - colno)) @@ -534,6 +534,17 @@ def _byte_offset_to_character_offset(str, offset): return len(as_utf8[:offset + 1].decode("utf-8")) +_Anchors = collections.namedtuple( + "_Anchors", + [ + "left_end_offset", + "right_start_offset", + "primary_char", + "secondary_char", + ], + defaults=["~", "^"] +) + def _extract_caret_anchors_from_line_segment(segment): import ast @@ -560,9 +571,9 @@ def _extract_caret_anchors_from_line_segment(segment): and not operator_str[operator_offset + 1].isspace() ): right_anchor += 1 - return left_anchor, right_anchor + return _Anchors(left_anchor, right_anchor) case ast.Subscript(): - return expr.value.end_col_offset, expr.slice.end_col_offset + 1 + return _Anchors(expr.value.end_col_offset, expr.slice.end_col_offset + 1) return None diff --git a/Python/traceback.c b/Python/traceback.c index 9add00e0866698..199d3ea7596bf8 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -539,7 +539,7 @@ static int extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor, char** primary_error_char, char** secondary_error_char) { -switch (expr->kind) { + switch (expr->kind) { case BinOp_kind: { expr_ty left = expr->v.BinOp.left; expr_ty right = expr->v.BinOp.right; @@ -743,23 +743,20 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen // right_end_offset will be set to -1. // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets. - Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset; - Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset; + assert(source_line); + Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); + Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); Py_ssize_t left_end_offset = -1; Py_ssize_t right_start_offset = -1; char *primary_error_char = "^"; char *secondary_error_char = primary_error_char; - if (source_line) { - start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); - end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); - int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset, - &left_end_offset, &right_start_offset, - &primary_error_char, &secondary_error_char); - if (res < 0 && ignore_source_errors() < 0) { - goto done; - } + int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset, + &left_end_offset, &right_start_offset, + &primary_error_char, &secondary_error_char); + if (res < 0 && ignore_source_errors() < 0) { + goto done; } err = print_error_location_carets(f, truncation, start_offset, end_offset, From 012ae63a7863b2c2cc07ce7204d46dab581d50e5 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Mon, 12 Jul 2021 14:14:51 -0400 Subject: [PATCH 6/7] Use TESTFN instead of NamedTemporaryFile --- Lib/test/test_traceback.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 82dce41fa8c13e..0c1af090544f13 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -464,16 +464,15 @@ def f_with_subscript(): self.assertEqual(result_lines, expected_error.splitlines()) def test_traceback_specialization_with_syntax_error(self): - with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as file: - bytecode = compile("1 / 0 / 1 / 2\n", file.name, "exec") + bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec") - # make the file invalid + with open(TESTFN, "w") as file: + # make the file's contents invalid file.write("1 $ 0 / 1 / 2\n") - file.flush() + self.addCleanup(unlink, TESTFN) func = partial(exec, bytecode) result_lines = self.get_exception(func) - os.unlink(file.name) lineno_f = bytecode.co_firstlineno expected_error = ( @@ -481,7 +480,7 @@ def test_traceback_specialization_with_syntax_error(self): f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' ' ^^^^^^^^^^\n' - f' File "{file.name}", line {lineno_f}, in \n' + f' File "{TESTFN}", line {lineno_f}, in \n' " 1 $ 0 / 1 / 2\n" ' ^^^^^\n' ) From 825e3b09c70ae2716cf85a31218cf13a1ec91288 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Mon, 12 Jul 2021 14:15:50 -0400 Subject: [PATCH 7/7] Remove tempfile import --- Lib/test/test_traceback.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 0c1af090544f13..8baf38c1afd5d6 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -6,7 +6,6 @@ import sys import inspect import unittest -import tempfile import re from test import support from test.support import (Error, captured_output, cpython_only, ALWAYS_EQ,