From 8c9e56046c21554302148608a93cd3ac60cd1bdc Mon Sep 17 00:00:00 2001 From: Sahil Jain Date: Sun, 13 Jul 2025 21:40:59 +0530 Subject: [PATCH 01/12] Support new opcodes --- ASTNode.h | 31 ++++- ASTree.cpp | 125 ++++++++++++++++-- FastStack.h | 19 +++ pyc_object.h | 27 +++- ...t_extend.3.9.pyc => list_extend_1.3.9.pyc} | Bin tests/compiled/list_extend_2.3.12.pyc | Bin 0 -> 215 bytes tests/compiled/test_unpack.3.12.pyc | Bin 0 -> 410 bytes .../{list_extend.py => list_extend_1.py} | 0 tests/input/list_extend_2.py | 2 + tests/input/test_unpack.py | 6 + .../{list_extend.txt => list_extend_1.txt} | 0 tests/tokenized/list_extend_2.txt | 3 + tests/tokenized/test_unpack.txt | 6 + 13 files changed, 197 insertions(+), 22 deletions(-) rename tests/compiled/{list_extend.3.9.pyc => list_extend_1.3.9.pyc} (100%) create mode 100644 tests/compiled/list_extend_2.3.12.pyc create mode 100644 tests/compiled/test_unpack.3.12.pyc rename tests/input/{list_extend.py => list_extend_1.py} (100%) create mode 100644 tests/input/list_extend_2.py create mode 100644 tests/input/test_unpack.py rename tests/tokenized/{list_extend.txt => list_extend_1.txt} (100%) create mode 100644 tests/tokenized/list_extend_2.txt create mode 100644 tests/tokenized/test_unpack.txt diff --git a/ASTNode.h b/ASTNode.h index 98760dbf5..1b2e643e7 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -18,13 +18,13 @@ class ASTNode { NODE_COMPREHENSION, NODE_LOADBUILDCLASS, NODE_AWAITABLE, NODE_FORMATTEDVALUE, NODE_JOINEDSTR, NODE_CONST_MAP, NODE_ANNOTATED_VAR, NODE_CHAINSTORE, NODE_TERNARY, - NODE_KW_NAMES_MAP, + NODE_KW_NAMES_MAP, NODE_CALL_INTRINSIC_1, NODE_CALL_INTRINSIC_2, // Empty node types NODE_LOCALS, }; - ASTNode(int type = NODE_INVALID) : m_refs(), m_type(type), m_processed() { } + ASTNode(int type = NODE_INVALID, bool unpacked = false) : m_refs(), m_type(type), m_processed(), m_unpacked(unpacked) { } virtual ~ASTNode() { } int type() const { return internalGetType(this); } @@ -32,10 +32,15 @@ class ASTNode { bool processed() const { return m_processed; } void setProcessed() { m_processed = true; } + bool unpacked() const { return m_unpacked; } + void setUnpacked() { m_unpacked = true; } + private: int m_refs; int m_type; bool m_processed; + // unpack this node into constituent values + bool m_unpacked; // Hack to make clang happy :( static int internalGetType(const ASTNode *node) @@ -757,4 +762,26 @@ class ASTTernary : public ASTNode PycRef m_else_expr; }; +class ASTCallIntrinsic1: public ASTNode +{ +public: + enum Function { + INTRINSIC_1_INVALID, INTRINSIC_PRINT, INTRINSIC_IMPORT_STAR, + INTRINSIC_STOPITERATION_ERROR, INTRINSIC_ASYNC_GEN_WRAP, + INTRINSIC_UNARY_POSITIVE, INTRINSIC_LIST_TO_TUPLE, INTRINSIC_TYPEVAR, + INTRINSIC_PARAMSPEC, INTRINSIC_TYPEVARTUPLE, + INTRINSIC_SUBSCRIPT_GENERIC, INTRINSIC_TYPEALIAS, + }; +}; + +class ASTCallIntrinsic2: public ASTNode +{ +public: + enum Function { + INTRINSIC_2_INVALID, INTRINSIC_PREP_RERAISE_STAR, + INTRINSIC_TYPEVAR_WITH_BOUND, INTRINSIC_TYPEVAR_WITH_CONSTRAINTS, + INTRINSIC_SET_FUNCTION_TYPE_PARAMS, INTRINSIC_SET_TYPEPARAM_DEFAULT, + }; +}; + #endif diff --git a/ASTree.cpp b/ASTree.cpp index 354292151..472d04875 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -5,6 +5,7 @@ #include "FastStack.h" #include "pyc_numeric.h" #include "bytecode.h" +#include // This must be a triple quote (''' or """), to handle interpolated string literals containing the opposite quote style. // E.g. f'''{"interpolated "123' literal"}''' -> valid. @@ -1443,29 +1444,46 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) break; case Pyc::LIST_EXTEND_A: { + if (operand != 1) { + fprintf(stderr, "LIST_EXTEND operand list is not at the top of the stack\n"); + break; + } + PycRef rhs = stack.top(); stack.pop(); PycRef lhs = stack.top().cast(); stack.pop(); - if (rhs.type() != ASTNode::NODE_OBJECT) { - fprintf(stderr, "Unsupported argument found for LIST_EXTEND\n"); - break; - } + if (rhs.type() == ASTNode::NODE_OBJECT) { - // I've only ever seen this be a SMALL_TUPLE, but let's be careful... - PycRef obj = rhs.cast()->object(); - if (obj->type() != PycObject::TYPE_TUPLE && obj->type() != PycObject::TYPE_SMALL_TUPLE) { - fprintf(stderr, "Unsupported argument type found for LIST_EXTEND\n"); - break; - } + // I've only ever seen this be a SMALL_TUPLE, but let's be careful... + PycRef obj = rhs.cast()->object(); + if (obj->type() != PycObject::TYPE_TUPLE && obj->type() != PycObject::TYPE_SMALL_TUPLE) { + fprintf(stderr, "Unsupported argument type found for LIST_EXTEND\n"); + break; + } - ASTList::value_t result = lhs->values(); - for (const auto& it : obj.cast()->values()) { - result.push_back(new ASTObject(it)); + ASTList::value_t result = lhs->values(); + for (const auto& it : obj.cast()->values()) { + result.push_back(new ASTObject(it)); + } + + stack.push(new ASTList(result)); } + else if (rhs.type() == ASTNode::NODE_NAME) { + ASTList::value_t result = lhs->values(); + + // rhs is a variable, so to extend the list + // we need to unpack rhs + PycRef unpacked_ref = rhs; + unpacked_ref.setUnpacked(); - stack.push(new ASTList(result)); + result.push_back(unpacked_ref); + stack.push(new ASTList(result)); + } + else { + fprintf(stderr, "Unsupported argument %i found for LIST_EXTEND\n", rhs.type()); + } } break; case Pyc::LOAD_ATTR_A: @@ -1515,6 +1533,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(new ASTName(code->getCellVar(mod, operand))); break; case Pyc::LOAD_FAST_A: + case Pyc::LOAD_FAST_CHECK_A: if (mod->verCompare(1, 3) < 0) stack.push(new ASTName(code->getName(operand))); else @@ -2577,6 +2596,76 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(value); } break; + case Pyc::CALL_INTRINSIC_1_A: + { + PycRef arg = stack.top(); + stack.pop(); + + if (operand != ASTCallIntrinsic1::INTRINSIC_LIST_TO_TUPLE) { + fprintf(stderr, "Unimplemented function %i", operand); + break; + } + + if (arg.type() != ASTNode::NODE_LIST) { + fprintf(stderr, "Unexpected argument type %i\n", arg.type()); + break; + } + + PycRef list = arg.cast(); + ASTTuple::value_t values; + for (PycRef val : list->values()) { + values.push_back(val); + } + stack.push(new ASTTuple(values)); + } + break; + case Pyc::CALL_FUNCTION_EX_A: + { + int has_kwmap = operand & 1; + ASTCall::kwparam_t kwparamList; + ASTCall::pparam_t pparamList; + + // callable, iterable object & kwmap object (if present) + + if (has_kwmap) { + PycRef object_or_map = stack.top(); + if (object_or_map.type() == ASTNode::NODE_KW_NAMES_MAP) { + stack.pop(); + PycRef kwparams_map = object_or_map.cast(); + for (ASTKwNamesMap::map_t::const_iterator it = kwparams_map->values().begin(); it != kwparams_map->values().end(); it++) { + kwparamList.push_front(std::make_pair(it->first, it->second)); + } + } + else { + fprintf(stderr, "Unexpected object type %i\n", object_or_map.type()); + } + } + + PycRef iterable = stack.top(); + stack.pop(); + + if (iterable.type() == ASTNode::NODE_LIST) { + PycRef list = iterable.cast(); + for (PycRef n: list->values()) { + pparamList.push_back(n); + } + } + else if (iterable.type() == ASTNode::NODE_TUPLE) { + PycRef tuple = iterable.cast(); + for (PycRef n: tuple->values()) { + pparamList.push_back(n); + } + } + else { + fprintf(stderr, "Unsupported iterable type %i\n", iterable.type()); + } + + PycRef func = stack.top(); + stack.pop(); + + stack.push(new ASTCall(func, pparamList, kwparamList)); + } + break; default: fprintf(stderr, "Unsupported opcode: %s (%d)\n", Pyc::OpcodeName(opcode), opcode); cleanBuild = false; @@ -2773,6 +2862,7 @@ void print_formatted_value(PycRef formatted_value, PycModule* pyc_output << "}"; } +// TODO: Handle m_unpack for node correctly here. void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) { if (node == NULL) { @@ -2891,6 +2981,10 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) break; case ASTNode::NODE_LIST: { + if (node.isUnpacked()) { + pyc_output << "*"; + } + pyc_output << "["; bool first = true; cur_indent++; @@ -2984,6 +3078,9 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } break; case ASTNode::NODE_NAME: + if (node.isUnpacked()) { + pyc_output << "*"; + } pyc_output << node.cast()->name()->value(); break; case ASTNode::NODE_NODELIST: diff --git a/FastStack.h b/FastStack.h index b91ec71de..624cf0a98 100644 --- a/FastStack.h +++ b/FastStack.h @@ -61,6 +61,25 @@ class FastStack { return m_ptr == -1; } + void debug_print(PycModule* mod, std::ostream& pyc_output) + { + pyc_output << "---- STACK CONTENTS ----\n"; + if (empty()) { + pyc_output << "empty stack\n"; + } + else { + for (int i = m_ptr; i >= 0; i--) { + print_src(m_stack[i], mod, pyc_output); + if (i == m_ptr) { + pyc_output << " <- STACK TOP"; + } + pyc_output << "\n"; + } + } + pyc_output << "------------------------\n"; + } + + private: std::vector> m_stack; int m_ptr; diff --git a/pyc_object.h b/pyc_object.h index 085944496..00140ec3b 100644 --- a/pyc_object.h +++ b/pyc_object.h @@ -6,21 +6,21 @@ template class PycRef { public: - PycRef() noexcept : m_obj() { } + PycRef() noexcept : m_obj(), m_unpack(false) { } - PycRef(_Obj* obj) noexcept : m_obj(obj) + PycRef(_Obj* obj) noexcept : m_obj(obj), m_unpack(false) { if (m_obj) m_obj->addRef(); } - PycRef(const PycRef<_Obj>& obj) noexcept : m_obj(obj.m_obj) + PycRef(const PycRef<_Obj>& obj) noexcept : m_obj(obj.m_obj), m_unpack(obj.m_unpack) { if (m_obj) m_obj->addRef(); } - PycRef(PycRef<_Obj>&& obj) noexcept : m_obj(obj.m_obj) + PycRef(PycRef<_Obj>&& obj) noexcept : m_obj(obj.m_obj), m_unpack(obj.m_unpack) { obj.m_obj = nullptr; } @@ -31,6 +31,8 @@ class PycRef { m_obj->delRef(); } + // Most operators should deal with m_unpack but we leave it as is for now + PycRef<_Obj>& operator=(_Obj* obj) { if (obj) @@ -75,16 +77,29 @@ class PycRef { template PycRef<_Cast> cast() const { - _Cast* result = dynamic_cast<_Cast*>(m_obj); - if (!result) + _Cast* casted_obj = dynamic_cast<_Cast*>(m_obj); + if (!casted_obj) throw std::bad_cast(); + + PycRef<_Cast> result = casted_obj; + if (m_unpack) { + result.setUnpacked(); + } return result; } bool isIdent(const _Obj* obj) const { return m_obj == obj; } + bool isUnpacked() const { return m_unpack; } + void setUnpacked() { m_unpack = true; } + private: _Obj* m_obj; + + // References to an object can be either packed or unpacked + // Usually unpacked references will be used with variables but + // they may arise in other places as well. + bool m_unpack; }; diff --git a/tests/compiled/list_extend.3.9.pyc b/tests/compiled/list_extend_1.3.9.pyc similarity index 100% rename from tests/compiled/list_extend.3.9.pyc rename to tests/compiled/list_extend_1.3.9.pyc diff --git a/tests/compiled/list_extend_2.3.12.pyc b/tests/compiled/list_extend_2.3.12.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25c333a8e39c9a975e05135b1c8c92f7d78494c0 GIT binary patch literal 215 zcmX@j%ge<81iE2`88SfnF^B^Lj8MjBkdo;PDGV(PQ4E!gnoP+s8IS@dAZ7+)sH${^ z8iqxT!3>&=ek*~*EykQ;kZOfrT*ZmG1v#lkdIgoYnA1~Bir9d{AgN+*Akn~ZgPXr2 zu*0Om`!kTs;HSw5R!{^ox`-V_umXu&95%W6DWy57c10XOE?7Irbsv}+85ut_F)|8( G)dBzKGX+88um7f;fK3P%*F!JCI}s;?F)nVmd6D7+|JX z0mZ6-!s(1P42u}ovh^_pGiWmV-D0%4#ptQYe2cBPq^LBxFJlG7MB$3XXX`@mVnvurFkH=dIgoYSjtQClZv>3Ni{zClt>xo>RQQaCX^DtBG|NxV2U=UgFlez@qgTsEXkw4~PKi(qt-P15#j9Rx%WU z#EU>-dyB&+H$SB`C)KV<0LTTor&tO|d|+l|WW3G5`-z2-QTrns10!ogQioteSSt@$ FF97PBQvUz| literal 0 HcmV?d00001 diff --git a/tests/input/list_extend.py b/tests/input/list_extend_1.py similarity index 100% rename from tests/input/list_extend.py rename to tests/input/list_extend_1.py diff --git a/tests/input/list_extend_2.py b/tests/input/list_extend_2.py new file mode 100644 index 000000000..1473f17de --- /dev/null +++ b/tests/input/list_extend_2.py @@ -0,0 +1,2 @@ +def get(l): + return [*l] diff --git a/tests/input/test_unpack.py b/tests/input/test_unpack.py new file mode 100644 index 000000000..fdf2ebf9e --- /dev/null +++ b/tests/input/test_unpack.py @@ -0,0 +1,6 @@ +import struct + +def wtob(w): + return struct.pack('<'+'I'*len(w), *w) + +wtob([12,3]) diff --git a/tests/tokenized/list_extend.txt b/tests/tokenized/list_extend_1.txt similarity index 100% rename from tests/tokenized/list_extend.txt rename to tests/tokenized/list_extend_1.txt diff --git a/tests/tokenized/list_extend_2.txt b/tests/tokenized/list_extend_2.txt new file mode 100644 index 000000000..31a437e0c --- /dev/null +++ b/tests/tokenized/list_extend_2.txt @@ -0,0 +1,3 @@ +def get ( l ) : + +return [ * l ] diff --git a/tests/tokenized/test_unpack.txt b/tests/tokenized/test_unpack.txt new file mode 100644 index 000000000..16c7af1f4 --- /dev/null +++ b/tests/tokenized/test_unpack.txt @@ -0,0 +1,6 @@ +import struct +def wtob ( w ) : + +return struct . pack ( '<' + 'I' * len ( w ) , * w ) + +wtob ( [ 12 , 3 ] ) From 6568907236cc51be705cf824d8743d5d4a2837f2 Mon Sep 17 00:00:00 2001 From: Sahil Jain Date: Mon, 14 Jul 2025 11:51:40 +0530 Subject: [PATCH 02/12] Resolve comments on PR --- ASTNode.h | 7 +------ ASTree.cpp | 3 ++- pyc_object.h | 20 +++++++++++--------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/ASTNode.h b/ASTNode.h index 1b2e643e7..0f7ebc28b 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -24,7 +24,7 @@ class ASTNode { NODE_LOCALS, }; - ASTNode(int type = NODE_INVALID, bool unpacked = false) : m_refs(), m_type(type), m_processed(), m_unpacked(unpacked) { } + ASTNode(int type = NODE_INVALID) : m_refs(), m_type(type), m_processed() { } virtual ~ASTNode() { } int type() const { return internalGetType(this); } @@ -32,15 +32,10 @@ class ASTNode { bool processed() const { return m_processed; } void setProcessed() { m_processed = true; } - bool unpacked() const { return m_unpacked; } - void setUnpacked() { m_unpacked = true; } - private: int m_refs; int m_type; bool m_processed; - // unpack this node into constituent values - bool m_unpacked; // Hack to make clang happy :( static int internalGetType(const ASTNode *node) diff --git a/ASTree.cpp b/ASTree.cpp index 472d04875..a0946c683 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1476,7 +1476,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) // rhs is a variable, so to extend the list // we need to unpack rhs PycRef unpacked_ref = rhs; - unpacked_ref.setUnpacked(); + unpacked_ref.setUnpacked(true); result.push_back(unpacked_ref); stack.push(new ASTList(result)); @@ -2644,6 +2644,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef iterable = stack.top(); stack.pop(); + // Not sure how to combine these two conditions if (iterable.type() == ASTNode::NODE_LIST) { PycRef list = iterable.cast(); for (PycRef n: list->values()) { diff --git a/pyc_object.h b/pyc_object.h index 00140ec3b..56baad842 100644 --- a/pyc_object.h +++ b/pyc_object.h @@ -31,8 +31,6 @@ class PycRef { m_obj->delRef(); } - // Most operators should deal with m_unpack but we leave it as is for now - PycRef<_Obj>& operator=(_Obj* obj) { if (obj) @@ -40,6 +38,7 @@ class PycRef { if (m_obj) m_obj->delRef(); m_obj = obj; + m_unpack = false; return *this; } @@ -50,16 +49,20 @@ class PycRef { if (m_obj) m_obj->delRef(); m_obj = obj.m_obj; + m_unpack = obj.m_unpack; return *this; } PycRef<_Obj>& operator=(PycRef<_Obj>&& obj) noexcept { m_obj = obj.m_obj; + m_unpack = obj.m_unpack; obj.m_obj = nullptr; + obj.m_unpack = false; return *this; } + // TODO: Handle m_unpack for remaining operators bool operator==(_Obj* obj) const { return m_obj == obj; } bool operator==(const PycRef<_Obj>& obj) const { return m_obj == obj.m_obj; } bool operator!=(_Obj* obj) const { return m_obj != obj; } @@ -82,23 +85,22 @@ class PycRef { throw std::bad_cast(); PycRef<_Cast> result = casted_obj; - if (m_unpack) { - result.setUnpacked(); - } + result.setUnpacked(m_unpack); + return result; } bool isIdent(const _Obj* obj) const { return m_obj == obj; } bool isUnpacked() const { return m_unpack; } - void setUnpacked() { m_unpack = true; } + void setUnpacked(bool unpack) { m_unpack = unpack; } private: _Obj* m_obj; - // References to an object can be either packed or unpacked - // Usually unpacked references will be used with variables but - // they may arise in other places as well. + // References to an object can be either packed or unpacked. + // Usually unpacked references will be used with variables + // or lists but they may arise in other places as well. bool m_unpack; }; From 271e14f46c1824b924f73fda49e82404a69120e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Fri, 12 Dec 2025 17:48:28 +0100 Subject: [PATCH 03/12] add LIST_TO_TUPLE operand support --- ASTree.cpp | 17 +++++++++++++++++ tests/compiled/list_to_tuple.3.10.pyc | Bin 0 -> 170 bytes tests/input/list_to_tuple.py | 2 ++ tests/tokenized/list_to_tuple.txt | 2 ++ 4 files changed, 21 insertions(+) create mode 100644 tests/compiled/list_to_tuple.3.10.pyc create mode 100644 tests/input/list_to_tuple.py create mode 100644 tests/tokenized/list_to_tuple.txt diff --git a/ASTree.cpp b/ASTree.cpp index 08d352c4d..6f194df2c 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1491,6 +1491,23 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } } break; + case Pyc::LIST_TO_TUPLE: + { + if (stack.top().type() != ASTNode::NODE_LIST){ + fprintf(stderr, "Unexpected argument type %i\n", stack.top().type()); + break; + } + + PycRef list = stack.top().cast(); + stack.pop(); + ASTTuple::value_t values; + for (PycRef val : list->values()) + { + values.push_back(val); + } + stack.push(new ASTTuple(values)); + } + break; case Pyc::LOAD_ATTR_A: { PycRef name = stack.top(); diff --git a/tests/compiled/list_to_tuple.3.10.pyc b/tests/compiled/list_to_tuple.3.10.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e30632239925ba92b463101bcc672ee4e6d95ef GIT binary patch literal 170 zcmd1j<>g`k0x?&cOf4Y&7{oyaOhAqU5EqL9iFAe(hDD4~Kspsj2Qfr3rZ5IGXfkOs zzhneTzXU0M$qXd?G?}8GK)*1IDm|j(t@1Sl?+8JKuIw1OF}<0ub{M~SRbS& fz9b)_M6aOo7Kcr4eoARhsvXG8Vvr6F20lgrlGh`H literal 0 HcmV?d00001 diff --git a/tests/input/list_to_tuple.py b/tests/input/list_to_tuple.py new file mode 100644 index 000000000..f37982813 --- /dev/null +++ b/tests/input/list_to_tuple.py @@ -0,0 +1,2 @@ +my_list = [1, 2, 3] +my_tuple = (*my_list,) \ No newline at end of file diff --git a/tests/tokenized/list_to_tuple.txt b/tests/tokenized/list_to_tuple.txt new file mode 100644 index 000000000..e42586e4c --- /dev/null +++ b/tests/tokenized/list_to_tuple.txt @@ -0,0 +1,2 @@ +my_list = [ 1 , 2 , 3 ] +my_tuple = ( * my_list , ) From b6349bea681956cc4b052a4cfeff595c12c6b2b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Fri, 12 Dec 2025 15:02:18 +0100 Subject: [PATCH 04/12] add support of dict comprehension --- ASTNode.h | 30 ++++++++++++++++++++++++++++-- ASTree.cpp | 21 +++++++++++++++++---- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/ASTNode.h b/ASTNode.h index 0f7ebc28b..941c95be7 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -644,9 +644,15 @@ class ASTComprehension : public ASTNode { public: typedef std::list> generator_t; - ASTComprehension(PycRef result) - : ASTNode(NODE_COMPREHENSION), m_result(std::move(result)) { } + enum CompType + { + COMP_LIST, COMP_DICT + }; + ASTComprehension(CompType comptype, PycRef result) + : ASTNode(NODE_COMPREHENSION), m_comptype(comptype), m_result(std::move(result)) {} + + CompType comptype() const { return m_comptype; } PycRef result() const { return m_result; } generator_t generators() const { return m_generators; } @@ -655,11 +661,31 @@ class ASTComprehension : public ASTNode { } private: + CompType m_comptype; PycRef m_result; generator_t m_generators; }; +class ASTListComprehension : public ASTComprehension +{ +public: + ASTListComprehension(PycRef result) + : ASTComprehension(COMP_LIST, std::move(result)) {} +}; + +class ASTDictComprehension : public ASTComprehension +{ +public: + ASTDictComprehension(PycRef key, PycRef value) + : ASTComprehension(COMP_DICT, std::move(value)), m_key(std::move(key)) {} + + PycRef key() const { return m_key; } + +private: + PycRef m_key; +}; + class ASTLoadBuildClass : public ASTNode { public: ASTLoadBuildClass(PycRef obj) diff --git a/ASTree.cpp b/ASTree.cpp index 6f194df2c..3811da84e 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -916,7 +916,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (mod->verCompare(3, 10) >= 0) end *= sizeof(uint16_t); // // BPO-27129 end += pos; - comprehension = strcmp(code->name()->value(), "") == 0; + comprehension = strcmp(code->name()->value(), "") == 0 || strcmp(code->name()->value(), "") == 0; } else { PycRef top = blocks.top(); end = top->end(); // block end position from SETUP_LOOP @@ -1779,7 +1779,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) auto& pparams = value.cast()->pparams(); if (!pparams.empty()) { PycRef res = pparams.front(); - stack.push(new ASTComprehension(res)); + stack.push(new ASTListComprehension(res)); } } } @@ -3056,7 +3056,13 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) { PycRef comp = node.cast(); - pyc_output << "[ "; + if (comp->comptype() == ASTComprehension::COMP_DICT) { + pyc_output << "{ "; + print_src(comp.cast()->key(), mod, pyc_output); + pyc_output << ": "; + } else { + pyc_output << "[ "; + } print_src(comp->result(), mod, pyc_output); for (const auto& gen : comp->generators()) { @@ -3069,7 +3075,14 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) print_src(gen->condition(), mod, pyc_output); } } - pyc_output << " ]"; + if (comp->comptype() == ASTComprehension::COMP_DICT) + { + pyc_output << "} "; + } + else + { + pyc_output << "] "; + } } break; case ASTNode::NODE_MAP: From b965c94ca61e5bf99ac429f7fea85af9ce7d6e64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Fri, 12 Dec 2025 15:03:44 +0100 Subject: [PATCH 05/12] add MAP_ADD operand support --- ASTree.cpp | 41 +++++++++++++++++++++++++++----- tests/compiled/map_add.3.10.pyc | Bin 0 -> 532 bytes tests/compiled/map_add.3.7.pyc | Bin 0 -> 427 bytes tests/input/map_add.py | 28 ++++++++++++++++++++++ 4 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 tests/compiled/map_add.3.10.pyc create mode 100644 tests/compiled/map_add.3.7.pyc create mode 100644 tests/input/map_add.py diff --git a/ASTree.cpp b/ASTree.cpp index 3811da84e..979cfec14 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1411,15 +1411,42 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef list = stack.top(); - if (curblock->blktype() == ASTBlock::BLK_FOR + if ((curblock->blktype() == ASTBlock::BLK_FOR) && curblock.cast()->isComprehension()) { stack.pop(); - stack.push(new ASTComprehension(value)); + stack.push(new ASTListComprehension(value)); } else { stack.push(new ASTSubscr(list, value)); /* Total hack */ } } break; + + case Pyc::MAP_ADD_A: + { + PycRef value; + PycRef key; + if (mod->verCompare(3, 8) >= 0) { + value = stack.top(); + stack.pop(); + key = stack.top(); + stack.pop(); + } else { + key = stack.top(); + stack.pop(); + value = stack.top(); + stack.pop(); + } + if (curblock->blktype() == ASTBlock::BLK_FOR + && curblock.cast()->isComprehension()){ + stack.pop(); + stack.push(new ASTDictComprehension(key, value)); + } else { + PycRef map = stack.top().cast(); + map->add(key, value); + } + + } + break; case Pyc::SET_UPDATE_A: { PycRef rhs = stack.top(); @@ -3060,8 +3087,10 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) pyc_output << "{ "; print_src(comp.cast()->key(), mod, pyc_output); pyc_output << ": "; - } else { + } else if (comp->comptype() == ASTComprehension::COMP_LIST) { pyc_output << "[ "; + } else { + fprintf(stderr, "Unsupported comprehension type %d in NODE_COMPREHENSION\n", comp->comptype()); } print_src(comp->result(), mod, pyc_output); @@ -3078,10 +3107,10 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) if (comp->comptype() == ASTComprehension::COMP_DICT) { pyc_output << "} "; - } - else - { + } else if (comp->comptype() == ASTComprehension::COMP_LIST){ pyc_output << "] "; + } else { + fprintf(stderr, "Unsupported comprehension type %d in NODE_COMPREHENSION\n", comp->comptype()); } } break; diff --git a/tests/compiled/map_add.3.10.pyc b/tests/compiled/map_add.3.10.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab8e63e7dbc4336adfa52ae0ef397ef0c7eee546 GIT binary patch literal 532 zcmYk%OH0E*5CGua$+St6rcEDO1nIq()*n!cpa)N0ya^(u>mg`uOj=N=C;bQg75=51 z1W!5ne%0s9i0Fp>c4xCQ%#M8DRWvS_yP*wAU2*V#SQM<%Or}Jr41F_gvr& zqAfaNL3Bk=EQ-EZ63b%1ZYxtU)pKQP>*`EvW$Ih1wUB9QA)_W$#$9E|F=ZxTD#U<`{tm9q1=naMO(IlNwd0-YDLr@}W1yAxbv1({pq3z`J1XY1 z3yM-`ucZ1ouJIu=uXe)2XfjSZ!+x~eH>Mq(q;A)QgYjTAjCxCSC*ka-iW{`FteD%8 zf-%2wv!zpQ{tEu=7U@swbcZ7|IoRuQMzTSGMh#yG@QD_DAi_K5;0^OIM;l(zffp>m dGrI7E9z0?Z9?*w-EWsU?;T8k9!3thi8{bqoXKMfe literal 0 HcmV?d00001 diff --git a/tests/compiled/map_add.3.7.pyc b/tests/compiled/map_add.3.7.pyc new file mode 100644 index 0000000000000000000000000000000000000000..67d8cd85f02cedc1ce9e85536045b93aa9c350ba GIT binary patch literal 427 zcmaivu};H442JE?H4TM8Q&15SVqnYCGFMSm0W%955<*qX5J70Tv=T^Rf;V901$d=Q zNKAQ!PWUfEY+%cueRuYMcgJDaA+?YFBx@U@Pb_|{r^Q6094X{~=eOvB#EADoVo&Lr znWS}2vEQ%?#F0a4^cHG7zydA62iiaYtO0mevkq(kA+QN-0bL+cq!5Kc7m6nu<$)rj ziZ1(4T(QuNbyl?6p^9J4z13HlMtjI%>|Iy2864I8;I^>|EPKQ3u2{^=VLmOcrXqvl zaoI^EOXuk 0)} + +# complete example with multiple conditions does not work, related to POP_JUMP_IF_FALSE +# if (x > 0 and x < 100 and x != 50) From b14e4f7fcc9f48ecb949e278d0fd23a8bcc54c3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Mon, 12 Jan 2026 13:50:34 +0100 Subject: [PATCH 06/12] add DICT_MERGE and CALL_FUNCTION_EX operand support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Eloïse Brocas --- ASTNode.h | 42 ++++--- ASTree.cpp | 165 +++++++++++++++++++++++---- tests/compiled/call_func_ex.3.10.pyc | Bin 0 -> 623 bytes tests/compiled/dict_merge.3.10.pyc | Bin 0 -> 206 bytes tests/input/call_func_ex.py | 22 ++++ tests/input/dict_merge.py | 5 + tests/tokenized/call_func_ex.txt | 17 +++ tests/tokenized/dict_merge.txt | 3 + 8 files changed, 215 insertions(+), 39 deletions(-) create mode 100644 tests/compiled/call_func_ex.3.10.pyc create mode 100644 tests/compiled/dict_merge.3.10.pyc create mode 100644 tests/input/call_func_ex.py create mode 100644 tests/input/dict_merge.py create mode 100644 tests/tokenized/call_func_ex.txt create mode 100644 tests/tokenized/dict_merge.txt diff --git a/ASTNode.h b/ASTNode.h index 941c95be7..eeab20765 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -286,13 +286,28 @@ class ASTCall : public ASTNode { typedef std::list> pparam_t; typedef std::list, PycRef>> kwparam_t; + ASTCall(PycRef func, pparam_t pparams, kwparam_t kwparams) : ASTNode(NODE_CALL), m_func(std::move(func)), m_pparams(std::move(pparams)), m_kwparams(std::move(kwparams)) { } + ASTCall(): ASTNode(NODE_CALL){} + + bool isKwparamUnpacked(std::pair, PycRef> value) + { + return value.first == NULL; + } + + std::pair, PycRef> genKwparamUnpacked(PycRef value){ + return std::make_pair(m_unpacked_marker, value); + } PycRef func() const { return m_func; } const pparam_t& pparams() const { return m_pparams; } const kwparam_t& kwparams() const { return m_kwparams; } + void setFunc(PycRef func) { m_func = std::move(func); } + void setPparams(pparam_t pparams) { m_pparams = std::move(pparams); } + void setKwparams(kwparam_t kwparams) { m_kwparams = std::move(kwparams); } + PycRef var() const { return m_var; } PycRef kw() const { return m_kw; } @@ -308,6 +323,7 @@ class ASTCall : public ASTNode { kwparam_t m_kwparams; PycRef m_var; PycRef m_kw; + const PycRef m_unpacked_marker = NULL; }; @@ -383,33 +399,31 @@ class ASTMap : public ASTNode { typedef std::list, PycRef>> map_t; ASTMap() : ASTNode(NODE_MAP) { } + ASTMap(enum ASTNode::Type subtype) : ASTNode(subtype) {} void add(PycRef key, PycRef value) { m_values.emplace_back(std::move(key), std::move(value)); } + void add_unpacked_value(PycRef variable) + { + m_values.emplace_back(m_unpacked_marker, std::move(variable)); + } + bool is_unpacked(std::pair , PycRef> value) + { + return value.first == m_unpacked_marker; + } const map_t& values() const { return m_values; } private: map_t m_values; + const PycRef m_unpacked_marker = NULL; }; -class ASTKwNamesMap : public ASTNode { +class ASTKwNamesMap : public ASTMap { public: - typedef std::list, PycRef>> map_t; - - ASTKwNamesMap() : ASTNode(NODE_KW_NAMES_MAP) { } - - void add(PycRef key, PycRef value) - { - m_values.emplace_back(std::move(key), std::move(value)); - } - - const map_t& values() const { return m_values; } - -private: - map_t m_values; + ASTKwNamesMap() : ASTMap(NODE_KW_NAMES_MAP) { } }; class ASTConstMap : public ASTNode { diff --git a/ASTree.cpp b/ASTree.cpp index 979cfec14..1f27e738a 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1535,6 +1535,42 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(new ASTTuple(values)); } break; + case Pyc::DICT_UPDATE_A: + case Pyc::DICT_MERGE_A: + { + PycRef rhs = stack.top(); + stack.pop(); + PycRef map = stack.top().cast(); + + switch (rhs.type()){ + case ASTNode::NODE_MAP: + case ASTNode::NODE_NAME: + case ASTNode::NODE_CALL: + case ASTNode::NODE_SUBSCR: + case ASTNode::NODE_BINARY: + map->add_unpacked_value(rhs); + break; + case ASTNode::NODE_CONST_MAP:{ + PycRef const_map = rhs.cast(); + PycTuple::value_t keys = const_map->keys().cast()->object().cast()->values(); + ASTConstMap::values_t values = const_map->values(); + + for (const auto &key : keys) + { + // Values are pushed onto the stack in reverse order. + PycRef value = values.back(); + values.pop_back(); + + map->add(new ASTObject(key), value); + } + } + break; + default: + fprintf(stderr, "Unsupported argument %i found for DICT_MERGE\n", rhs.type()); + break; + } + } + break; case Pyc::LOAD_ATTR_A: { PycRef name = stack.top(); @@ -2675,47 +2711,119 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) int has_kwmap = operand & 1; ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; + ASTCall *call = new ASTCall(); // callable, iterable object & kwmap object (if present) if (has_kwmap) { - PycRef object_or_map = stack.top(); - if (object_or_map.type() == ASTNode::NODE_KW_NAMES_MAP) { + PycRef kwmap_stack = stack.top(); + switch (kwmap_stack.type()){ + case ASTNode::NODE_KW_NAMES_MAP: + case ASTNode::NODE_MAP:{ stack.pop(); - PycRef kwparams_map = object_or_map.cast(); - for (ASTKwNamesMap::map_t::const_iterator it = kwparams_map->values().begin(); it != kwparams_map->values().end(); it++) { - kwparamList.push_front(std::make_pair(it->first, it->second)); + PycRef kwmap = kwmap_stack.cast(); + for (ASTMap::map_t::const_iterator it = kwmap->values().begin(); it != kwmap->values().end(); it++) + { + if (kwmap->is_unpacked(*it)) + { + kwparamList.push_back(call->genKwparamUnpacked(it->second)); + } + else + { + kwparamList.push_back(std::make_pair(it->first, it->second)); + } + } + break; + } + case ASTNode::NODE_CONST_MAP:{ + stack.pop(); + PycRef const_map = kwmap_stack.cast(); + PycTuple::value_t keys = const_map->keys().cast()->object().cast()->values(); + ASTConstMap::values_t values = const_map->values(); + + for (const auto &key : keys) + { + // Values are pushed onto the stack in reverse order. + PycRef value = values.back(); + values.pop_back(); + + kwparamList.push_back(std::make_pair(new ASTObject(key), value)); + } + break; + } + case ASTNode::NODE_OBJECT:{ + PycRef obj = kwmap_stack.cast()->object(); + if (obj.type() == PycObject::TYPE_DICT){ + for (const auto &it : obj.cast()->values()) + { + kwparamList.push_back(std::make_pair(new ASTObject(std::get<0>(it)), new ASTObject(std::get<1>(it)))); + } + } else { + fprintf(stderr, "Unsupported node object type %i\n", obj.type()); } + break; } - else { - fprintf(stderr, "Unexpected object type %i\n", object_or_map.type()); + default: + fprintf(stderr, "Unexpected object type %i for kwparams in CALL_FUNCTION_EX\n", kwmap_stack.type()); + break; } } PycRef iterable = stack.top(); stack.pop(); - // Not sure how to combine these two conditions - if (iterable.type() == ASTNode::NODE_LIST) { - PycRef list = iterable.cast(); - for (PycRef n: list->values()) { + switch (iterable.type()) { + case ASTNode::NODE_LIST: + for (PycRef n : iterable.cast()->values()) + { pparamList.push_back(n); } - } - else if (iterable.type() == ASTNode::NODE_TUPLE) { - PycRef tuple = iterable.cast(); - for (PycRef n: tuple->values()) { + break; + case ASTNode::NODE_TUPLE: + for (PycRef n : iterable.cast()->values()) + { pparamList.push_back(n); } + break; + case ASTNode::NODE_SET: + for (PycRef n : iterable.cast()->values()) + { + pparamList.push_back(n); + } + break; + case ASTNode::NODE_OBJECT: + switch (iterable.cast()->object().type()) + { + case PycObject::TYPE_LIST: + case PycObject::TYPE_SET: + case PycObject::TYPE_TUPLE: + case PycObject::TYPE_SMALL_TUPLE: + for (const auto &it : iterable.cast()->object().cast()->values()) + { + pparamList.push_back(new ASTObject(it)); + } + break; + default: + fprintf(stderr, "Unsupported node object type %i\n", iterable.cast()->object().type()); + break; + } + break; + case ASTNode::NODE_SUBSCR: + case ASTNode::NODE_BINARY: + case ASTNode::NODE_NAME: + pparamList.push_back(iterable); + break; + default: + fprintf(stderr, "Unsupported param iterable type %i in CALL_FUNC_EX\n", iterable.type()); + break; } - else { - fprintf(stderr, "Unsupported iterable type %i\n", iterable.type()); - } - PycRef func = stack.top(); stack.pop(); - - stack.push(new ASTCall(func, pparamList, kwparamList)); + + call->setFunc(func); + call->setKwparams(kwparamList); + call->setPparams(pparamList); + stack.push(call); } break; default: @@ -2963,7 +3071,9 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) for (const auto& param : call->kwparams()) { if (!first) pyc_output << ", "; - if (param.first.type() == ASTNode::NODE_NAME) { + if (call->isKwparamUnpacked(param)){ + pyc_output << "**"; + } else if (param.first.type() == ASTNode::NODE_NAME) { pyc_output << param.first.cast()->name()->value() << " = "; } else { PycRef str_name = param.first.cast()->object().cast(); @@ -3119,14 +3229,19 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) pyc_output << "{"; bool first = true; cur_indent++; - for (const auto& val : node.cast()->values()) { + PycRef map = node.cast(); + for (const auto& val : map->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); - print_src(val.first, mod, pyc_output); - pyc_output << ": "; + if (map->is_unpacked(val)){ + pyc_output << "**"; + } else { + print_src(val.first, mod, pyc_output); + pyc_output << ": "; + } print_src(val.second, mod, pyc_output); first = false; } diff --git a/tests/compiled/call_func_ex.3.10.pyc b/tests/compiled/call_func_ex.3.10.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e32007e1ea7f38135935f704c91f5ea1b3bdd61 GIT binary patch literal 623 zcmY*VJx{|x3_V{iY5EZf2~<|rfJ8+rHiVFnSdcoDElU;RYZcKi-K7F%nE4SEOaD?< zru+pa>}w0flI`a^+xOnhqo^Us&W_T21VVgqv2Tis8*)3KfCAxNBmyORltIIr0D%hq z91@8^;$aO#!V2rypblUAW1-~=w6}tRz$V{n^$8z9bFBvvN13F>JM#z-3vo|w0}6G4 z9(AeyT~(v93Ms5b>?J-fr_29Nu}V~G#uqMj zUF?;?(=(mfJt~d`j1FnSF;HL*2$#O=?8m`|JF_w^zJNy)alu`Cd-zG?mW5Z-IW8yq Tn)z973n2^w6?zH?$rJAf$M|R^ literal 0 HcmV?d00001 diff --git a/tests/compiled/dict_merge.3.10.pyc b/tests/compiled/dict_merge.3.10.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc96a773f7f0dd978d765608f219fd95975e74fa GIT binary patch literal 206 zcmd1j<>g`kf{R}gGK+!qV-N=!umU*_KwNACBvKes7&9587*ZKi*fJSXm{OQC8J94o zGAv>2V`K!2rm&>2WHO@5q_74vXtLj8OnS)(H02g!@=GQ#o#?lcL6fNnEXb0QnOsuD y1{6mSzeM#j^9o8!iuFN~@wur*>8W}JmA5!-a`RJ4b5iX + +return 8 + +def start ( ) : + +function_5 ( 1 , 2 , ** { 'test' : 42 } ) + +start ( ) +a = { 'a' : 1 } +b = ( 1 , 2 ) +c = { 'c' : 0 } +def f ( arg1 , arg2 , arg3 , ** kwargs ) : + +return 1 + +f ( * a , * b , kwarg = 0 , ** c ) diff --git a/tests/tokenized/dict_merge.txt b/tests/tokenized/dict_merge.txt new file mode 100644 index 000000000..93478713f --- /dev/null +++ b/tests/tokenized/dict_merge.txt @@ -0,0 +1,3 @@ +b = { 'b' : 1 } +dict ( ** { 'c' : 2 } , ** b ) +dict ( a = 'a' , ** { 'c' : 2 } , ** b ) From 8006f941c0fa25fc28cef314f94ea8689b6b8b62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Tue, 27 Jan 2026 15:12:12 +0100 Subject: [PATCH 07/12] add unpack_ex opcode support --- ASTNode.h | 33 ++++++++++++++++++-- ASTree.cpp | 50 ++++++++++++++++++++++++++---- tests/compiled/unpack_ex.3.10.pyc | Bin 0 -> 528 bytes tests/input/unpack_ex.py | 39 +++++++++++++++++++++++ tests/tokenized/unpack_ex.txt | 19 ++++++++++++ 5 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 tests/compiled/unpack_ex.3.10.pyc create mode 100644 tests/input/unpack_ex.py create mode 100644 tests/tokenized/unpack_ex.txt diff --git a/ASTNode.h b/ASTNode.h index eeab20765..223d4fb95 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -18,7 +18,7 @@ class ASTNode { NODE_COMPREHENSION, NODE_LOADBUILDCLASS, NODE_AWAITABLE, NODE_FORMATTEDVALUE, NODE_JOINEDSTR, NODE_CONST_MAP, NODE_ANNOTATED_VAR, NODE_CHAINSTORE, NODE_TERNARY, - NODE_KW_NAMES_MAP, NODE_CALL_INTRINSIC_1, NODE_CALL_INTRINSIC_2, + NODE_KW_NAMES_MAP, NODE_CALL_INTRINSIC_1, NODE_CALL_INTRINSIC_2, NODE_UNPACKED_TUPLE, // Empty node types NODE_LOCALS, @@ -355,6 +355,7 @@ class ASTTuple : public ASTNode { ASTTuple(value_t values) : ASTNode(NODE_TUPLE), m_values(std::move(values)), m_requireParens(true) { } + ASTTuple(enum ASTNode::Type subtype, value_t values, bool requireParens) : ASTNode(subtype), m_values(std::move(values)), m_requireParens(requireParens) {} const value_t& values() const { return m_values; } void add(PycRef name) { m_values.emplace_back(std::move(name)); } @@ -362,11 +363,39 @@ class ASTTuple : public ASTNode { void setRequireParens(bool require) { m_requireParens = require; } bool requireParens() const { return m_requireParens; } -private: +protected: value_t m_values; bool m_requireParens; }; +class ASTUnpackedTuple : public ASTTuple { +public: + ASTUnpackedTuple(u_int8_t before, u_int8_t after) : ASTTuple(NODE_UNPACKED_TUPLE, value_t(), true), m_before(std::move(before)), m_after(std::move(after)){} + + void add(PycRef value){ + if (m_before > 0){ + m_before--; + } else if (!m_unpackedValueAdded){ + m_unpackedValueAdded = true; + value.setUnpacked(true); + } else if (m_after > 0){ + m_after--; + } else{ + fputs("Cannot add new value to unpacked tuple!\n", stderr); + return; + } + setRequireParens(false); + m_values.emplace_back(std::move(value)); + } + bool isFull() { return m_unpackedValueAdded and (m_values.size() > (m_before + m_after)); } + u_int8_t unpackedBefore() { return m_before; } + u_int8_t unpackedAfter() { return m_after; } + +private: + u_int8_t m_before; + bool m_unpackedValueAdded = false; + u_int8_t m_after; +}; class ASTList : public ASTNode { public: diff --git a/ASTree.cpp b/ASTree.cpp index 1f27e738a..cfb7c4923 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -2308,12 +2308,38 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef name = new ASTName(code->getName(operand)); PycRef tup = stack.top(); - if (tup.type() == ASTNode::NODE_TUPLE) + switch (tup.type()){ + case ASTNode::Type::NODE_TUPLE: tup.cast()->add(name); - else - fputs("Something TERRIBLE happened!\n", stderr); + break; + case ASTNode::Type::NODE_UNPACKED_TUPLE: + tup.cast()->add(name); + break; + default: + fprintf(stderr, "Unsupported iterable type %i\n", tup->type()); + break; + } + unpack--; - if (--unpack <= 0) { + while (unpack > 0 + and tup->type() == ASTNode::Type::NODE_UNPACKED_TUPLE + and tup.cast()->isFull() + ) + { + PycRef val = tup; + stack.pop(); + if (stack.top()->type() == ASTNode::Type::NODE_UNPACKED_TUPLE + and not stack.top().cast()->isFull()) + { + stack.top().cast()->add(tup); + tup = stack.top(); + unpack--; + } else { + stack.push(tup); + break; + } + } + if (unpack <= 0){ stack.pop(); PycRef seq = stack.top(); stack.pop(); @@ -2552,6 +2578,15 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } } break; + case Pyc::UNPACK_EX_A: + { + uint8_t before = operand & 0xFF; + uint8_t after = (operand >> 8) & 0xFF; + + stack.push(new ASTUnpackedTuple(before, after)); + unpack += (after + before + 1); + } + break; case Pyc::YIELD_FROM: { PycRef dest = stack.top(); @@ -3667,10 +3702,13 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } break; case ASTNode::NODE_TUPLE: + case ASTNode::NODE_UNPACKED_TUPLE: { PycRef tuple = node.cast(); ASTTuple::value_t values = tuple->values(); - if (tuple->requireParens()) + if (tuple.isUnpacked()) + pyc_output << "*("; + else if (tuple->requireParens()) pyc_output << "("; bool first = true; for (const auto& val : values) { @@ -3681,7 +3719,7 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } if (values.size() == 1) pyc_output << ','; - if (tuple->requireParens()) + if (tuple->requireParens() or tuple.isUnpacked()) pyc_output << ')'; } break; diff --git a/tests/compiled/unpack_ex.3.10.pyc b/tests/compiled/unpack_ex.3.10.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2e20f1b8128f1ed811f24d08bb6a7c9f61845e7 GIT binary patch literal 528 zcmYjNF-`+95cGO|mrF>Z6G8%|prE^kd;lRNs41Yc#Gw$IQ$)BNP9lXmAIKZh`2tsa6Y~t58(wl5I&nrpDQmbBtL@DEJCodtthvD&=*1XqDI`+A(PdX0Dlw+r3r(>UE-a^$E5yr7Fc9cCiP)s^)1V literal 0 HcmV?d00001 diff --git a/tests/input/unpack_ex.py b/tests/input/unpack_ex.py new file mode 100644 index 000000000..e87860301 --- /dev/null +++ b/tests/input/unpack_ex.py @@ -0,0 +1,39 @@ +## Tests from CPython tests (Lib/test/test_unpack_ex.py) +# Unpack tuple +t = (1, 2, 3) +a, *b, c = (1, 2, 3) + +# Unpack list +l = [4, 5, 6] +a, *b = l +a, *b, c = [4,5,6] + +#Unpack implied tuple +*a, = 7, 8, 9 + +# Unpack nested implied tuple +[*[*a],b] = [[7, 8, 9]] # note for tests, another notation possible is: *(*a,),b = [[7, 8, 9]] +[*[*a]] = [[7, 8, 9]] # note for tests, another notation possible is: *(*a,), = [[7, 8, 9]] + +# Unpack string... fun! +a, *b = "one" + +# Unpack long sequence +a, b, c, *d, e, f, g = range(10) + +# Unpack short sequence +a, *b, c = (1, 2) + +# Unpack in for statement +for a, *b, c in [(1,2,3), (4,5,6,7)]: + print(a, b, c) + +# Unpack in list +[a, *b, c] = range(5) + +# Multiple targets +a, *b, c = *d, e = range(5) + +# Assignment unpacking +a, b, *c = range(5) +*a, b, c = a, b, *c \ No newline at end of file diff --git a/tests/tokenized/unpack_ex.txt b/tests/tokenized/unpack_ex.txt new file mode 100644 index 000000000..01439b3e1 --- /dev/null +++ b/tests/tokenized/unpack_ex.txt @@ -0,0 +1,19 @@ +t = ( 1 , 2 , 3 ) +a , * b , c = ( 1 , 2 , 3 ) +l = [ 4 , 5 , 6 ] +a , * b = l +a , * b , c = [ 4 , 5 , 6 ] +* a , = ( 7 , 8 , 9 ) +* ( * a , ) , b = [ [ 7 , 8 , 9 ] ] +* ( * a , ) , = [ [ 7 , 8 , 9 ] ] +a , * b = 'one' +a , b , c , * d , e , f , g = range ( 10 ) +a , * b , c = ( 1 , 2 ) +for a , * b , c in ( ( 1 , 2 , 3 ) , ( 4 , 5 , 6 , 7 ) ) : + +print ( a , b , c ) + +a , * b , c = range ( 5 ) +a , * b , c = * d , e = range ( 5 ) +a , b , * c = range ( 5 ) +* a , b , c = ( a , b , * c ) From 31b84750cbaa321b86e050bd90c11ed76742578f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Mon, 2 Feb 2026 17:34:45 +0100 Subject: [PATCH 08/12] add load_assert_error support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Eloïse Brocas --- ASTNode.h | 15 ++++++- ASTree.cpp | 61 +++++++++++++++++++++++++++- tests/compiled/load_assert.3.10.pyc | Bin 0 -> 217 bytes tests/input/load_assert.py | 5 +++ tests/tokenized/load_assert.txt | 4 ++ 5 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 tests/compiled/load_assert.3.10.pyc create mode 100644 tests/input/load_assert.py create mode 100644 tests/tokenized/load_assert.txt diff --git a/ASTNode.h b/ASTNode.h index 223d4fb95..7a46e33de 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -18,7 +18,7 @@ class ASTNode { NODE_COMPREHENSION, NODE_LOADBUILDCLASS, NODE_AWAITABLE, NODE_FORMATTEDVALUE, NODE_JOINEDSTR, NODE_CONST_MAP, NODE_ANNOTATED_VAR, NODE_CHAINSTORE, NODE_TERNARY, - NODE_KW_NAMES_MAP, NODE_CALL_INTRINSIC_1, NODE_CALL_INTRINSIC_2, NODE_UNPACKED_TUPLE, + NODE_KW_NAMES_MAP, NODE_CALL_INTRINSIC_1, NODE_CALL_INTRINSIC_2, NODE_UNPACKED_TUPLE, NODE_ASSERT, // Empty node types NODE_LOCALS, @@ -552,6 +552,19 @@ class ASTRaise : public ASTNode { param_t m_params; }; +class ASTAssert : public ASTNode { +public: + ASTAssert() : ASTNode(NODE_ASSERT) {} + + const PycRef cond() const { return m_cond; } + const PycRef msg() const { return m_msg; } + void setCond(PycRef cond) { m_cond = std::move(cond); } + void setMsg(PycRef msg) { m_msg = std::move(msg); } + +private : + PycRef m_cond; + PycRef m_msg; +}; class ASTExec : public ASTNode { public: diff --git a/ASTree.cpp b/ASTree.cpp index cfb7c4923..355480574 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -522,7 +522,16 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.pop(); } - stack.push(new ASTCall(func, pparamList, kwparamList)); + if (func->type() == ASTNode::NODE_ASSERT){ + if (pparamList.size() > 1){ + fprintf(stderr, "Assert can only have one message\n"); + } else { + func.cast()->setMsg(pparamList.front()); + stack.push(func); + } + } else { + stack.push(new ASTCall(func, pparamList, kwparamList)); + } } break; case Pyc::CALL_FUNCTION_VAR_A: @@ -1571,6 +1580,34 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } } break; + case Pyc::LOAD_ASSERTION_ERROR: + { + PycRef assertion = new ASTAssert(); + switch (curblock->blktype()){ + case ASTBlock::BLK_IF: + case ASTBlock::BLK_ELIF:{ + if (stack_hist.size()) + { + stack = stack_hist.top(); + stack_hist.pop(); + } + + PycRef prev = curblock.cast(); + blocks.pop(); + curblock = blocks.top(); + assertion.cast()->setCond(prev->cond()); + } + break; + case ASTBlock::BLK_MAIN: + assertion.cast()->setCond(new ASTObject(new PycObject(PycObject::TYPE_FALSE))); + break; + default: + fprintf(stderr, "Unsupported block type %i found for LOAD_ASSERTION_ERROR\n", curblock->blktype()); + break; + } + stack.push(assertion); + } + break; case Pyc::LOAD_ATTR_A: { PycRef name = stack.top(); @@ -1908,6 +1945,12 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) break; case Pyc::RAISE_VARARGS_A: { + if (operand == 1 and stack.top()->type() == ASTNode::NODE_ASSERT){ + curblock->append(stack.top()); + stack.pop(); + break; + } + ASTRaise::param_t paramList; for (int i = 0; i < operand; i++) { paramList.push_front(stack.top()); @@ -3413,7 +3456,21 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } } break; - case ASTNode::NODE_RETURN: + case ASTNode::NODE_ASSERT: + { + PycRef assertion = node.cast(); + pyc_output << "assert "; + if (assertion->cond()){ + print_src(assertion->cond(), mod, pyc_output); + if (assertion->msg()){ + pyc_output << ", "; + print_src(assertion->msg(), mod, pyc_output); + } + } + + } + break; + case ASTNode::NODE_RETURN: { PycRef ret = node.cast(); PycRef value = ret->value(); diff --git a/tests/compiled/load_assert.3.10.pyc b/tests/compiled/load_assert.3.10.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36623d8619aa3968649f0023326bc8e80db64dd1 GIT binary patch literal 217 zcmd1j<>g`kf_>{-GdqCvV-N=!FabFZKwNACBvKiQS-cpU7*iNh7_ym)dAt}>7@HZJ z7*iQin6g=lBvKinG*b#QP+kTo&jOMM>Ct3;$p|zeilrpKB>yFl#gOUX#LrO0nOano zU!;&*oUX}yi!m{ZDJdz6IXO9bB|{MlP!X8;C8nR5S5R6~te=ygm=d2@T%1}|qE}FP ci^C>2KczG$)edBIF$a*~U=(0fU}Rwg00f~gWB>pF literal 0 HcmV?d00001 diff --git a/tests/input/load_assert.py b/tests/input/load_assert.py new file mode 100644 index 000000000..d9cb53f85 --- /dev/null +++ b/tests/input/load_assert.py @@ -0,0 +1,5 @@ +# Some tests come from CPython tests +assert a +assert 1 == 1, "toto" +assert a > 0 and bb > 0 and ccc == 1000000, "error msg" +assert False \ No newline at end of file diff --git a/tests/tokenized/load_assert.txt b/tests/tokenized/load_assert.txt new file mode 100644 index 000000000..df3d8366b --- /dev/null +++ b/tests/tokenized/load_assert.txt @@ -0,0 +1,4 @@ +assert a +assert 1 == 1 , 'toto' +assert a > 0 and bb > 0 or ccc == 1000000 , 'error msg' +assert False From c27bdd1659af256b6633e10cadb1f7a1b555d91d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Mon, 2 Mar 2026 13:43:45 +0100 Subject: [PATCH 09/12] create a dedicated object for exception blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Eloïse Brocas --- ASTNode.h | 24 ++++++++++++++++++++++++ ASTree.cpp | 22 +++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/ASTNode.h b/ASTNode.h index 7a46e33de..54451e7d9 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -680,6 +680,30 @@ class ASTContainerBlock : public ASTBlock { int m_except; }; +class ASTExceptBlock : public ASTBlock +{ +public: + ASTExceptBlock(int end) + : ASTBlock(ASTBlock::BLK_EXCEPT, end) {} + ASTExceptBlock(int end, PycRef expr) + : ASTBlock(ASTBlock::BLK_EXCEPT, end), m_expr(std::move(expr)) {} + + PycRef expr() const { return m_expr; } + PycRef var() const { return m_var; } + + void setExpr(PycRef expr) + { + m_expr = std::move(expr); + init(); + } + void setVar(PycRef var) { m_var = std::move(var); } + +private: + PycRef m_expr; + PycRef m_var; // optional value +}; + + class ASTWithBlock : public ASTBlock { public: ASTWithBlock(int end) diff --git a/ASTree.cpp b/ASTree.cpp index 355480574..5c0f2719a 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1058,7 +1058,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A: { PycRef cond = stack.top(); - PycRef ifblk; + PycRef ifblk; int popped = ASTCondBlock::UNINITED; if (opcode == Pyc::POP_JUMP_IF_FALSE_A @@ -1103,15 +1103,15 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (cond.type() == ASTNode::NODE_COMPARE && cond.cast()->op() == ASTCompare::CMP_EXCEPTION) { - if (curblock->blktype() == ASTBlock::BLK_EXCEPT - && curblock.cast()->cond() == NULL) { + if (curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock.cast()->expr() == NULL) + { blocks.pop(); curblock = blocks.top(); stack_hist.pop(); } - ifblk = new ASTCondBlock(ASTBlock::BLK_EXCEPT, offs, cond.cast()->right(), false); + ifblk = new ASTExceptBlock(offs, cond.cast()->right()); } else if (curblock->blktype() == ASTBlock::BLK_ELSE && curblock->size() == 0) { /* Collapse into elif statement */ @@ -1234,7 +1234,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (cont->hasExcept() && pos < cont->except()) { - PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, 0, NULL, false); + PycRef except = new ASTExceptBlock(0); except->init(); blocks.push(except); curblock = blocks.top(); @@ -1274,7 +1274,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (push) { stack_hist.push(stack); } - PycRef next = new ASTCondBlock(ASTBlock::BLK_EXCEPT, blocks.top()->end(), NULL, false); + PycRef next = new ASTExceptBlock(blocks.top()->end()); next->init(); blocks.push(next.cast()); @@ -1309,7 +1309,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack_hist.push(stack); curblock->setEnd(pos+offs); - PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); + PycRef except = new ASTExceptBlock(pos + offs); except->init(); blocks.push(except); curblock = blocks.top(); @@ -1359,7 +1359,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (push) { stack_hist.push(stack); } - PycRef next = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); + PycRef next = new ASTExceptBlock(pos + offs); next->init(); blocks.push(next.cast()); @@ -1390,7 +1390,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack_hist.push(stack); } - PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); + PycRef except = new ASTExceptBlock(pos + offs); except->init(); blocks.push(except); } @@ -3393,9 +3393,9 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) pyc_output << " in "; print_src(blk.cast()->iter(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_EXCEPT && - blk.cast()->cond() != NULL) { + blk.cast()->expr() != NULL) { pyc_output << " "; - print_src(blk.cast()->cond(), mod, pyc_output); + print_src(blk.cast()->expr(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_WITH) { pyc_output << " "; print_src(blk.cast()->expr(), mod, pyc_output); From d35347e56907437d272ab73cda41e86081900a8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Tue, 3 Mar 2026 10:52:11 +0100 Subject: [PATCH 10/12] add support of jump_if_not_exc_match and reraise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Eloïse Brocas --- ASTNode.h | 1 + ASTree.cpp | 184 +++++++++++++++--- tests/compiled/jump_if_not_exc_match.3.10.pyc | Bin 0 -> 441 bytes tests/compiled/reraise.3.10.pyc | Bin 0 -> 569 bytes tests/input/jump_if_not_exc_match.py | 15 ++ tests/input/reraise.py | 17 ++ tests/tokenized/jump_if_not_exc_match.txt | 27 +++ tests/tokenized/reraise.txt | 31 +++ 8 files changed, 246 insertions(+), 29 deletions(-) create mode 100644 tests/compiled/jump_if_not_exc_match.3.10.pyc create mode 100644 tests/compiled/reraise.3.10.pyc create mode 100644 tests/input/jump_if_not_exc_match.py create mode 100644 tests/input/reraise.py create mode 100644 tests/tokenized/jump_if_not_exc_match.txt create mode 100644 tests/tokenized/reraise.txt diff --git a/ASTNode.h b/ASTNode.h index 54451e7d9..8aea23caf 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -674,6 +674,7 @@ class ASTContainerBlock : public ASTBlock { int except() const { return m_except; } void setExcept(int except) { m_except = except; } + void setFinally(int finally) { m_finally = finally; } private: int m_finally; diff --git a/ASTree.cpp b/ASTree.cpp index 5c0f2719a..278702cc4 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -41,6 +41,21 @@ static PycRef StackPopTop(FastStack& stack) return node; } +static FastStack &StackHistPopTop(FastStack &stack, stackhist_t &stack_hist) +{ + if (!stack_hist.empty()){ + stack = stack_hist.top(); + stack_hist.pop(); + } + return stack; +} + +static void StackPopIfNotEmpty(stackhist_t &stack) +{ + if (!stack.empty()) + stack.pop(); +} + /* compiler generates very, VERY similar byte code for if/else statement block and if-expression * statement * if a: b = 1 @@ -91,6 +106,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) int curpos = 0; int pos = 0; int unpack = 0; + int goto_addr = 0; bool else_pop = false; bool need_try = false; bool variable_annotations = false; @@ -112,12 +128,17 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) curpos = pos; bc_next(source, mod, opcode, operand, pos); + if (goto_addr != 0 && goto_addr >= pos){ + continue; + } + if (need_try && opcode != Pyc::SETUP_EXCEPT_A) { need_try = false; /* Store the current stack for the except/finally statement(s) */ stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, curblock->end(), true); + tryblock->setEnd(blocks.top()->end()); blocks.push(tryblock); curblock = blocks.top(); } else if (else_pop @@ -1050,6 +1071,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) case Pyc::JUMP_IF_TRUE_A: case Pyc::JUMP_IF_FALSE_OR_POP_A: case Pyc::JUMP_IF_TRUE_OR_POP_A: + case Pyc::JUMP_IF_NOT_EXC_MATCH_A: case Pyc::POP_JUMP_IF_FALSE_A: case Pyc::POP_JUMP_IF_TRUE_A: case Pyc::POP_JUMP_FORWARD_IF_FALSE_A: @@ -1061,6 +1083,15 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef ifblk; int popped = ASTCondBlock::UNINITED; + if (opcode == Pyc::JUMP_IF_NOT_EXC_MATCH_A) + { + stack.pop(); + PycRef comp = new ASTCompare(stack.top(), cond, ASTCompare::CMP_EXCEPTION); + stack.pop(); + cond = comp.cast(); + popped = ASTCondBlock::POPPED; + } + if (opcode == Pyc::POP_JUMP_IF_FALSE_A || opcode == Pyc::POP_JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_FALSE_A @@ -1103,12 +1134,46 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (cond.type() == ASTNode::NODE_COMPARE && cond.cast()->op() == ASTCompare::CMP_EXCEPTION) { - if (curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock.cast()->expr() == NULL) - { + if ((curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock.cast()->expr() == NULL) + || (curblock->blktype() == ASTBlock::BLK_FINALLY && curblock->size() == 0)){ blocks.pop(); curblock = blocks.top(); - stack_hist.pop(); + StackPopIfNotEmpty(stack_hist); + if (mod->verCompare(3,9) >= 0){ + StackPopIfNotEmpty(stack_hist); + } + } + if (mod->verCompare(3, 9) >= 0){ + blocks.pop(); + if (!blocks.empty() && blocks.top()->blktype() != ASTBlock::BLK_MAIN && blocks.top()->end() < pos) + { + stack = StackHistPopTop(stack, stack_hist); + + PycRef tmp = curblock; + curblock = blocks.top(); + + if (tmp->blktype() != ASTBlock::BLK_ELSE && tmp->nodes().size() > 0){ + curblock->append(tmp.cast()); + } + stack = StackHistPopTop(stack, stack_hist); + + tmp = curblock; + blocks.pop(); + curblock = blocks.top(); + + if (!(tmp->blktype() == ASTBlock::BLK_ELSE && tmp->nodes().size() == 0)){ + curblock->append(tmp.cast()); + } + } else { + blocks.push(curblock); + } + if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { + curblock.cast()->setExcept(pos); + curblock.cast()->setFinally(0); + curblock.cast()->setEnd(offs); + } + stack_hist.push(stack); } ifblk = new ASTExceptBlock(offs, cond.cast()->right()); @@ -1303,9 +1368,20 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 + if (mod->verCompare(3, 9) >= 0){ + if (curblock->blktype() == ASTBlock::BLK_FINALLY){ + blocks.pop(); + curblock = blocks.top(); + StackPopIfNotEmpty(stack_hist); + } + if (curblock->blktype() == ASTBlock::BLK_EXCEPT || curblock->blktype() == ASTBlock::BLK_ELSE){ + break; + } + } + if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); - if (cont->hasExcept()) { + if (cont->hasExcept() && cont->except() >= pos) { stack_hist.push(stack); curblock->setEnd(pos+offs); @@ -1380,8 +1456,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } else if (prev->blktype() == ASTBlock::BLK_TRY && prev->end() < pos+offs) { /* Need to add an except/finally block */ - stack = stack_hist.top(); - stack.pop(); + stack = StackHistPopTop(stack, stack_hist); if (blocks.top()->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = blocks.top().cast(); @@ -1725,15 +1800,17 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) break; case Pyc::NOP: break; + case Pyc::POP_EXCEPT: + case Pyc::RERAISE_A: case Pyc::POP_BLOCK: { - if (curblock->blktype() == ASTBlock::BLK_CONTAINER || - curblock->blktype() == ASTBlock::BLK_FINALLY) { - /* These should only be popped by an END_FINALLY */ - break; + if (mod->verCompare(3, 9) < 0) { + if (opcode == Pyc::POP_EXCEPT) { + break; + } } - if (curblock->blktype() == ASTBlock::BLK_WITH) { + if (curblock->blktype() == ASTBlock::BLK_WITH and mod->verCompare(3,9) <0) { // This should only be popped by a WITH_CLEANUP break; } @@ -1762,8 +1839,8 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (!blocks.empty()) curblock = blocks.top(); - if (!(tmp->blktype() == ASTBlock::BLK_ELSE - && tmp->nodes().size() == 0)) { + if (!((tmp->blktype() == ASTBlock::BLK_ELSE || tmp->blktype() == ASTBlock::BLK_FINALLY) + && tmp->nodes().size() == 0)){ curblock->append(tmp.cast()); } @@ -1795,15 +1872,17 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); - if (tmp->blktype() == ASTBlock::BLK_ELSE && !cont->hasFinally()) { + if ((tmp->blktype() == ASTBlock::BLK_ELSE && !cont->hasFinally() && !cont->hasExcept()) + || tmp->blktype() == ASTBlock::BLK_FINALLY) { /* Pop the container */ blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); - - } else if ((tmp->blktype() == ASTBlock::BLK_ELSE && cont->hasFinally()) - || (tmp->blktype() == ASTBlock::BLK_TRY && !cont->hasExcept())) { + } else if (opcode != Pyc::RERAISE_A + && (((tmp->blktype() == ASTBlock::BLK_ELSE && cont->hasFinally()) + || (tmp->blktype() == ASTBlock::BLK_TRY && !cont->hasExcept()) + || (tmp->blktype() == ASTBlock::BLK_EXCEPT)))) { /* Add the finally block */ stack_hist.push(stack); @@ -1811,9 +1890,21 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef final = new ASTBlock(ASTBlock::BLK_FINALLY, 0, true); blocks.push(final); curblock = blocks.top(); + } else if (opcode == Pyc::RERAISE_A){ + stack = StackHistPopTop(stack, stack_hist); + } + + if (mod->verCompare(3,9) >= 0){ + if (tmp->end() > curblock->end()){ + cont->setEnd(tmp->end()); + } + goto_addr = tmp->end(); } } + if (tmp->blktype() == ASTBlock::BLK_CONTAINER && opcode == Pyc::RERAISE_A) + stack = StackHistPopTop(stack, stack_hist); + if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && curblock->end() == pos) { blocks.pop(); @@ -1822,9 +1913,6 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } } break; - case Pyc::POP_EXCEPT: - /* Do nothing. */ - break; case Pyc::END_FOR: { stack.pop(); @@ -1959,16 +2047,24 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) curblock->append(new ASTRaise(paramList)); if ((curblock->blktype() == ASTBlock::BLK_IF - || curblock->blktype() == ASTBlock::BLK_ELSE) + || curblock->blktype() == ASTBlock::BLK_ELSE + || curblock->blktype() == ASTBlock::BLK_TRY + || curblock->blktype() == ASTBlock::BLK_EXCEPT) && stack_hist.size() && (mod->verCompare(2, 6) >= 0)) { stack = stack_hist.top(); stack_hist.pop(); PycRef prev = curblock; - blocks.pop(); - curblock = blocks.top(); - curblock->append(prev.cast()); + if (!blocks.empty()){ + blocks.pop(); + curblock = blocks.top(); + curblock->append(prev.cast()); + } + if (prev->end() > curblock->end()){ + curblock->setEnd(prev->end()); + } + goto_addr = prev->end(); } } break; @@ -2093,6 +2189,8 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) { if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { curblock.cast()->setExcept(pos+operand); + if (mod->verCompare(3,9) >=0) + curblock.cast()->setFinally(0); } else { PycRef next = new ASTContainerBlock(0, pos+operand); blocks.push(next.cast()); @@ -2109,7 +2207,15 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) break; case Pyc::SETUP_FINALLY_A: { - PycRef next = new ASTContainerBlock(pos+operand); + int offs = operand; + if (mod->verCompare(3, 10) >= 0) + offs *= sizeof(uint16_t); + if (curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock->size() == 0){ + break; + } + + PycRef next = new ASTContainerBlock(pos + offs); + next->setEnd(pos + offs); blocks.push(next.cast()); curblock = blocks.top(); @@ -2297,6 +2403,11 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) curblock.cast()->setVar(name); } else if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); + } else if (curblock->blktype() == ASTBlock::BLK_EXCEPT + && curblock->size() == 0 + && curblock.cast()->var() == NULL + && value->type() == ASTNode::NODE_INVALID) { + curblock.cast()->setVar(name); } else { curblock->append(new ASTStore(value, name)); } @@ -2428,9 +2539,16 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) && !curblock->inited()) { curblock.cast()->setExpr(value); curblock.cast()->setVar(name); + } else if (curblock->blktype() == ASTBlock::BLK_EXCEPT + && curblock->size() == 0 + && curblock.cast()->var() == NULL + && value->type() == ASTNode::NODE_INVALID) { + curblock.cast()->setVar(name); } else if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); - } else { + } + else + { curblock->append(new ASTStore(value, name)); if (value.type() == ASTNode::NODE_INVALID) @@ -2910,10 +3028,12 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) return new ASTNodeList(defblock->nodes()); } - else_pop = ( (curblock->blktype() == ASTBlock::BLK_ELSE) + else_pop = ((curblock->blktype() == ASTBlock::BLK_ELSE) || (curblock->blktype() == ASTBlock::BLK_IF) - || (curblock->blktype() == ASTBlock::BLK_ELIF) ) - && (curblock->end() == pos); + || (curblock->blktype() == ASTBlock::BLK_ELIF) + || (curblock->blktype() == ASTBlock::BLK_FINALLY) + || (curblock->blktype() == ASTBlock::BLK_EXCEPT)) + && (curblock->end() == pos || goto_addr == curblock->end()); } if (stack_hist.size()) { @@ -3396,6 +3516,12 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) blk.cast()->expr() != NULL) { pyc_output << " "; print_src(blk.cast()->expr(), mod, pyc_output); + PycRef var = blk.try_cast()->var(); + if (var != NULL) + { + pyc_output << " as "; + print_src(var, mod, pyc_output); + } } else if (blk->blktype() == ASTBlock::BLK_WITH) { pyc_output << " "; print_src(blk.cast()->expr(), mod, pyc_output); diff --git a/tests/compiled/jump_if_not_exc_match.3.10.pyc b/tests/compiled/jump_if_not_exc_match.3.10.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fa00723b209dba349807f67dc0153564694d67c GIT binary patch literal 441 zcmaKmzfQw25XSEuw}k!y6(NMgl7XcQg&k0l=*q%?8ipt|u4+V`$VnTL35j<>V(Y^& zS!_&w1yEJaR7FhKy6<%7-+jKg*{lJbx7mw)r~F}yzsJ6IDbg89S&bmV1MuMnRGovs zqJ>~@M7=&J%%SWO#eS(nO9?h)#R-D%Ae1%1h4YL^9({-$-???v{wn84bfq|3c?uqJ z%aQMPPIQDmIr}to{cpAAa%flPNNdk2V9rRc%DrqPwD9sU^+M%|$AL&P8LRe&!O*mP zq2o(Al_|A)TF2UKU8Jed7FpFeHI+n5m6^@ndf4FIE9g4;|H!A;kt&&FY3F`2P69a$ zRGbC$RWJ^-!RRO{jC(qcqsfCfx9g?HBn%qZKo{AP^9JtVF1k;)!?Z-Plh2w z4=cEb?xROOTUMAuK3gh#ihuW^80>Gd`y7#zZgS-8fcDGXr~0?CUuY@2)q)g!G&>!k z!Yg?%_0M)0?VKqQ&{Ws#iEA*3`l3upZP_p)x+1A;uI00gWESLxSdJ>2 + +a = 1 / 0 + +except ZeroDivisionError : + +a = 1 + +try : + +try : + +a = 2 / 0 + +except ( ZeroDivisionError , AssertionError ) as v : + +print ( v ) + +except Exception : + +print ( 'there was an exception' ) + + +finally : + +b = 0 +a = 2 diff --git a/tests/tokenized/reraise.txt b/tests/tokenized/reraise.txt new file mode 100644 index 000000000..cd513ea74 --- /dev/null +++ b/tests/tokenized/reraise.txt @@ -0,0 +1,31 @@ +def intricate_try_except ( ) : + +try : + +try : + +a = 1 + +except AssertionError as a : + +print ( a ) + +except TypeError : + +raise + + +except Exception as e : + +raise e + + +def reraise_example ( ) : + +try : + +1 / 0 + +except ZeroDivisionError : + +raise From bd288c8d9b849d3e82e4fafea51ce52403fb8522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Fri, 6 Mar 2026 10:12:47 +0100 Subject: [PATCH 11/12] list_extend opcode: support calls of function inside list extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Eloïse Brocas --- ASTree.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/ASTree.cpp b/ASTree.cpp index 278702cc4..d058eafb3 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1570,7 +1570,8 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef lhs = stack.top().cast(); stack.pop(); - if (rhs.type() == ASTNode::NODE_OBJECT) { + switch(rhs.type()){ + case ASTNode::NODE_OBJECT: { // I've only ever seen this be a SMALL_TUPLE, but let's be careful... PycRef obj = rhs.cast()->object(); @@ -1585,8 +1586,12 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } stack.push(new ASTList(result)); - } - else if (rhs.type() == ASTNode::NODE_NAME) { + } + break; + case ASTNode::NODE_NAME: + case ASTNode::NODE_CALL: + case ASTNode::NODE_BINARY: + case ASTNode::NODE_SUBSCR: { ASTList::value_t result = lhs->values(); // rhs is a variable, so to extend the list @@ -1596,9 +1601,11 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) result.push_back(unpacked_ref); stack.push(new ASTList(result)); - } - else { + } + break; + default: fprintf(stderr, "Unsupported argument %i found for LIST_EXTEND\n", rhs.type()); + break; } } break; From 81082bb2e8e50d198f7b93062f8be0658332fa9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elo=C3=AFse=20Brocas?= Date: Fri, 6 Mar 2026 10:18:11 +0100 Subject: [PATCH 12/12] decompilation: support unpacking for all node types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Eloïse Brocas --- ASTree.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/ASTree.cpp b/ASTree.cpp index d058eafb3..97abc6650 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -3244,6 +3244,9 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } node_seen.insert((ASTNode *)node); + if (node.isUnpacked()) + pyc_output << "*"; + switch (node->type()) { case ASTNode::NODE_BINARY: case ASTNode::NODE_COMPARE: @@ -3356,10 +3359,6 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) break; case ASTNode::NODE_LIST: { - if (node.isUnpacked()) { - pyc_output << "*"; - } - pyc_output << "["; bool first = true; cur_indent++; @@ -3473,9 +3472,6 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } break; case ASTNode::NODE_NAME: - if (node.isUnpacked()) { - pyc_output << "*"; - } pyc_output << node.cast()->name()->value(); break; case ASTNode::NODE_NODELIST: @@ -3896,9 +3892,7 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) { PycRef tuple = node.cast(); ASTTuple::value_t values = tuple->values(); - if (tuple.isUnpacked()) - pyc_output << "*("; - else if (tuple->requireParens()) + if (tuple->requireParens() or tuple.isUnpacked()) pyc_output << "("; bool first = true; for (const auto& val : values) {