From 8c9e56046c21554302148608a93cd3ac60cd1bdc Mon Sep 17 00:00:00 2001 From: Sahil Jain Date: Sun, 13 Jul 2025 21:40:59 +0530 Subject: [PATCH 1/2] Support new opcodes --- ASTNode.h | 31 ++++- ASTree.cpp | 125 ++++++++++++++++-- FastStack.h | 19 +++ pyc_object.h | 27 +++- ...t_extend.3.9.pyc => list_extend_1.3.9.pyc} | Bin tests/compiled/list_extend_2.3.12.pyc | Bin 0 -> 215 bytes tests/compiled/test_unpack.3.12.pyc | Bin 0 -> 410 bytes .../{list_extend.py => list_extend_1.py} | 0 tests/input/list_extend_2.py | 2 + tests/input/test_unpack.py | 6 + .../{list_extend.txt => list_extend_1.txt} | 0 tests/tokenized/list_extend_2.txt | 3 + tests/tokenized/test_unpack.txt | 6 + 13 files changed, 197 insertions(+), 22 deletions(-) rename tests/compiled/{list_extend.3.9.pyc => list_extend_1.3.9.pyc} (100%) create mode 100644 tests/compiled/list_extend_2.3.12.pyc create mode 100644 tests/compiled/test_unpack.3.12.pyc rename tests/input/{list_extend.py => list_extend_1.py} (100%) create mode 100644 tests/input/list_extend_2.py create mode 100644 tests/input/test_unpack.py rename tests/tokenized/{list_extend.txt => list_extend_1.txt} (100%) create mode 100644 tests/tokenized/list_extend_2.txt create mode 100644 tests/tokenized/test_unpack.txt diff --git a/ASTNode.h b/ASTNode.h index 98760dbf5..1b2e643e7 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -18,13 +18,13 @@ class ASTNode { NODE_COMPREHENSION, NODE_LOADBUILDCLASS, NODE_AWAITABLE, NODE_FORMATTEDVALUE, NODE_JOINEDSTR, NODE_CONST_MAP, NODE_ANNOTATED_VAR, NODE_CHAINSTORE, NODE_TERNARY, - NODE_KW_NAMES_MAP, + NODE_KW_NAMES_MAP, NODE_CALL_INTRINSIC_1, NODE_CALL_INTRINSIC_2, // Empty node types NODE_LOCALS, }; - ASTNode(int type = NODE_INVALID) : m_refs(), m_type(type), m_processed() { } + ASTNode(int type = NODE_INVALID, bool unpacked = false) : m_refs(), m_type(type), m_processed(), m_unpacked(unpacked) { } virtual ~ASTNode() { } int type() const { return internalGetType(this); } @@ -32,10 +32,15 @@ class ASTNode { bool processed() const { return m_processed; } void setProcessed() { m_processed = true; } + bool unpacked() const { return m_unpacked; } + void setUnpacked() { m_unpacked = true; } + private: int m_refs; int m_type; bool m_processed; + // unpack this node into constituent values + bool m_unpacked; // Hack to make clang happy :( static int internalGetType(const ASTNode *node) @@ -757,4 +762,26 @@ class ASTTernary : public ASTNode PycRef m_else_expr; }; +class ASTCallIntrinsic1: public ASTNode +{ +public: + enum Function { + INTRINSIC_1_INVALID, INTRINSIC_PRINT, INTRINSIC_IMPORT_STAR, + INTRINSIC_STOPITERATION_ERROR, INTRINSIC_ASYNC_GEN_WRAP, + INTRINSIC_UNARY_POSITIVE, INTRINSIC_LIST_TO_TUPLE, INTRINSIC_TYPEVAR, + INTRINSIC_PARAMSPEC, INTRINSIC_TYPEVARTUPLE, + INTRINSIC_SUBSCRIPT_GENERIC, INTRINSIC_TYPEALIAS, + }; +}; + +class ASTCallIntrinsic2: public ASTNode +{ +public: + enum Function { + INTRINSIC_2_INVALID, INTRINSIC_PREP_RERAISE_STAR, + INTRINSIC_TYPEVAR_WITH_BOUND, INTRINSIC_TYPEVAR_WITH_CONSTRAINTS, + INTRINSIC_SET_FUNCTION_TYPE_PARAMS, INTRINSIC_SET_TYPEPARAM_DEFAULT, + }; +}; + #endif diff --git a/ASTree.cpp b/ASTree.cpp index 354292151..472d04875 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -5,6 +5,7 @@ #include "FastStack.h" #include "pyc_numeric.h" #include "bytecode.h" +#include // This must be a triple quote (''' or """), to handle interpolated string literals containing the opposite quote style. // E.g. f'''{"interpolated "123' literal"}''' -> valid. @@ -1443,29 +1444,46 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) break; case Pyc::LIST_EXTEND_A: { + if (operand != 1) { + fprintf(stderr, "LIST_EXTEND operand list is not at the top of the stack\n"); + break; + } + PycRef rhs = stack.top(); stack.pop(); PycRef lhs = stack.top().cast(); stack.pop(); - if (rhs.type() != ASTNode::NODE_OBJECT) { - fprintf(stderr, "Unsupported argument found for LIST_EXTEND\n"); - break; - } + if (rhs.type() == ASTNode::NODE_OBJECT) { - // I've only ever seen this be a SMALL_TUPLE, but let's be careful... - PycRef obj = rhs.cast()->object(); - if (obj->type() != PycObject::TYPE_TUPLE && obj->type() != PycObject::TYPE_SMALL_TUPLE) { - fprintf(stderr, "Unsupported argument type found for LIST_EXTEND\n"); - break; - } + // I've only ever seen this be a SMALL_TUPLE, but let's be careful... + PycRef obj = rhs.cast()->object(); + if (obj->type() != PycObject::TYPE_TUPLE && obj->type() != PycObject::TYPE_SMALL_TUPLE) { + fprintf(stderr, "Unsupported argument type found for LIST_EXTEND\n"); + break; + } - ASTList::value_t result = lhs->values(); - for (const auto& it : obj.cast()->values()) { - result.push_back(new ASTObject(it)); + ASTList::value_t result = lhs->values(); + for (const auto& it : obj.cast()->values()) { + result.push_back(new ASTObject(it)); + } + + stack.push(new ASTList(result)); } + else if (rhs.type() == ASTNode::NODE_NAME) { + ASTList::value_t result = lhs->values(); + + // rhs is a variable, so to extend the list + // we need to unpack rhs + PycRef unpacked_ref = rhs; + unpacked_ref.setUnpacked(); - stack.push(new ASTList(result)); + result.push_back(unpacked_ref); + stack.push(new ASTList(result)); + } + else { + fprintf(stderr, "Unsupported argument %i found for LIST_EXTEND\n", rhs.type()); + } } break; case Pyc::LOAD_ATTR_A: @@ -1515,6 +1533,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(new ASTName(code->getCellVar(mod, operand))); break; case Pyc::LOAD_FAST_A: + case Pyc::LOAD_FAST_CHECK_A: if (mod->verCompare(1, 3) < 0) stack.push(new ASTName(code->getName(operand))); else @@ -2577,6 +2596,76 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(value); } break; + case Pyc::CALL_INTRINSIC_1_A: + { + PycRef arg = stack.top(); + stack.pop(); + + if (operand != ASTCallIntrinsic1::INTRINSIC_LIST_TO_TUPLE) { + fprintf(stderr, "Unimplemented function %i", operand); + break; + } + + if (arg.type() != ASTNode::NODE_LIST) { + fprintf(stderr, "Unexpected argument type %i\n", arg.type()); + break; + } + + PycRef list = arg.cast(); + ASTTuple::value_t values; + for (PycRef val : list->values()) { + values.push_back(val); + } + stack.push(new ASTTuple(values)); + } + break; + case Pyc::CALL_FUNCTION_EX_A: + { + int has_kwmap = operand & 1; + ASTCall::kwparam_t kwparamList; + ASTCall::pparam_t pparamList; + + // callable, iterable object & kwmap object (if present) + + if (has_kwmap) { + PycRef object_or_map = stack.top(); + if (object_or_map.type() == ASTNode::NODE_KW_NAMES_MAP) { + stack.pop(); + PycRef kwparams_map = object_or_map.cast(); + for (ASTKwNamesMap::map_t::const_iterator it = kwparams_map->values().begin(); it != kwparams_map->values().end(); it++) { + kwparamList.push_front(std::make_pair(it->first, it->second)); + } + } + else { + fprintf(stderr, "Unexpected object type %i\n", object_or_map.type()); + } + } + + PycRef iterable = stack.top(); + stack.pop(); + + if (iterable.type() == ASTNode::NODE_LIST) { + PycRef list = iterable.cast(); + for (PycRef n: list->values()) { + pparamList.push_back(n); + } + } + else if (iterable.type() == ASTNode::NODE_TUPLE) { + PycRef tuple = iterable.cast(); + for (PycRef n: tuple->values()) { + pparamList.push_back(n); + } + } + else { + fprintf(stderr, "Unsupported iterable type %i\n", iterable.type()); + } + + PycRef func = stack.top(); + stack.pop(); + + stack.push(new ASTCall(func, pparamList, kwparamList)); + } + break; default: fprintf(stderr, "Unsupported opcode: %s (%d)\n", Pyc::OpcodeName(opcode), opcode); cleanBuild = false; @@ -2773,6 +2862,7 @@ void print_formatted_value(PycRef formatted_value, PycModule* pyc_output << "}"; } +// TODO: Handle m_unpack for node correctly here. void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) { if (node == NULL) { @@ -2891,6 +2981,10 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) break; case ASTNode::NODE_LIST: { + if (node.isUnpacked()) { + pyc_output << "*"; + } + pyc_output << "["; bool first = true; cur_indent++; @@ -2984,6 +3078,9 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } break; case ASTNode::NODE_NAME: + if (node.isUnpacked()) { + pyc_output << "*"; + } pyc_output << node.cast()->name()->value(); break; case ASTNode::NODE_NODELIST: diff --git a/FastStack.h b/FastStack.h index b91ec71de..624cf0a98 100644 --- a/FastStack.h +++ b/FastStack.h @@ -61,6 +61,25 @@ class FastStack { return m_ptr == -1; } + void debug_print(PycModule* mod, std::ostream& pyc_output) + { + pyc_output << "---- STACK CONTENTS ----\n"; + if (empty()) { + pyc_output << "empty stack\n"; + } + else { + for (int i = m_ptr; i >= 0; i--) { + print_src(m_stack[i], mod, pyc_output); + if (i == m_ptr) { + pyc_output << " <- STACK TOP"; + } + pyc_output << "\n"; + } + } + pyc_output << "------------------------\n"; + } + + private: std::vector> m_stack; int m_ptr; diff --git a/pyc_object.h b/pyc_object.h index 085944496..00140ec3b 100644 --- a/pyc_object.h +++ b/pyc_object.h @@ -6,21 +6,21 @@ template class PycRef { public: - PycRef() noexcept : m_obj() { } + PycRef() noexcept : m_obj(), m_unpack(false) { } - PycRef(_Obj* obj) noexcept : m_obj(obj) + PycRef(_Obj* obj) noexcept : m_obj(obj), m_unpack(false) { if (m_obj) m_obj->addRef(); } - PycRef(const PycRef<_Obj>& obj) noexcept : m_obj(obj.m_obj) + PycRef(const PycRef<_Obj>& obj) noexcept : m_obj(obj.m_obj), m_unpack(obj.m_unpack) { if (m_obj) m_obj->addRef(); } - PycRef(PycRef<_Obj>&& obj) noexcept : m_obj(obj.m_obj) + PycRef(PycRef<_Obj>&& obj) noexcept : m_obj(obj.m_obj), m_unpack(obj.m_unpack) { obj.m_obj = nullptr; } @@ -31,6 +31,8 @@ class PycRef { m_obj->delRef(); } + // Most operators should deal with m_unpack but we leave it as is for now + PycRef<_Obj>& operator=(_Obj* obj) { if (obj) @@ -75,16 +77,29 @@ class PycRef { template PycRef<_Cast> cast() const { - _Cast* result = dynamic_cast<_Cast*>(m_obj); - if (!result) + _Cast* casted_obj = dynamic_cast<_Cast*>(m_obj); + if (!casted_obj) throw std::bad_cast(); + + PycRef<_Cast> result = casted_obj; + if (m_unpack) { + result.setUnpacked(); + } return result; } bool isIdent(const _Obj* obj) const { return m_obj == obj; } + bool isUnpacked() const { return m_unpack; } + void setUnpacked() { m_unpack = true; } + private: _Obj* m_obj; + + // References to an object can be either packed or unpacked + // Usually unpacked references will be used with variables but + // they may arise in other places as well. + bool m_unpack; }; diff --git a/tests/compiled/list_extend.3.9.pyc b/tests/compiled/list_extend_1.3.9.pyc similarity index 100% rename from tests/compiled/list_extend.3.9.pyc rename to tests/compiled/list_extend_1.3.9.pyc diff --git a/tests/compiled/list_extend_2.3.12.pyc b/tests/compiled/list_extend_2.3.12.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25c333a8e39c9a975e05135b1c8c92f7d78494c0 GIT binary patch literal 215 zcmX@j%ge<81iE2`88SfnF^B^Lj8MjBkdo;PDGV(PQ4E!gnoP+s8IS@dAZ7+)sH${^ z8iqxT!3>&=ek*~*EykQ;kZOfrT*ZmG1v#lkdIgoYnA1~Bir9d{AgN+*Akn~ZgPXr2 zu*0Om`!kTs;HSw5R!{^ox`-V_umXu&95%W6DWy57c10XOE?7Irbsv}+85ut_F)|8( G)dBzKGX+88um7f;fK3P%*F!JCI}s;?F)nVmd6D7+|JX z0mZ6-!s(1P42u}ovh^_pGiWmV-D0%4#ptQYe2cBPq^LBxFJlG7MB$3XXX`@mVnvurFkH=dIgoYSjtQClZv>3Ni{zClt>xo>RQQaCX^DtBG|NxV2U=UgFlez@qgTsEXkw4~PKi(qt-P15#j9Rx%WU z#EU>-dyB&+H$SB`C)KV<0LTTor&tO|d|+l|WW3G5`-z2-QTrns10!ogQioteSSt@$ FF97PBQvUz| literal 0 HcmV?d00001 diff --git a/tests/input/list_extend.py b/tests/input/list_extend_1.py similarity index 100% rename from tests/input/list_extend.py rename to tests/input/list_extend_1.py diff --git a/tests/input/list_extend_2.py b/tests/input/list_extend_2.py new file mode 100644 index 000000000..1473f17de --- /dev/null +++ b/tests/input/list_extend_2.py @@ -0,0 +1,2 @@ +def get(l): + return [*l] diff --git a/tests/input/test_unpack.py b/tests/input/test_unpack.py new file mode 100644 index 000000000..fdf2ebf9e --- /dev/null +++ b/tests/input/test_unpack.py @@ -0,0 +1,6 @@ +import struct + +def wtob(w): + return struct.pack('<'+'I'*len(w), *w) + +wtob([12,3]) diff --git a/tests/tokenized/list_extend.txt b/tests/tokenized/list_extend_1.txt similarity index 100% rename from tests/tokenized/list_extend.txt rename to tests/tokenized/list_extend_1.txt diff --git a/tests/tokenized/list_extend_2.txt b/tests/tokenized/list_extend_2.txt new file mode 100644 index 000000000..31a437e0c --- /dev/null +++ b/tests/tokenized/list_extend_2.txt @@ -0,0 +1,3 @@ +def get ( l ) : + +return [ * l ] diff --git a/tests/tokenized/test_unpack.txt b/tests/tokenized/test_unpack.txt new file mode 100644 index 000000000..16c7af1f4 --- /dev/null +++ b/tests/tokenized/test_unpack.txt @@ -0,0 +1,6 @@ +import struct +def wtob ( w ) : + +return struct . pack ( '<' + 'I' * len ( w ) , * w ) + +wtob ( [ 12 , 3 ] ) From 6568907236cc51be705cf824d8743d5d4a2837f2 Mon Sep 17 00:00:00 2001 From: Sahil Jain Date: Mon, 14 Jul 2025 11:51:40 +0530 Subject: [PATCH 2/2] Resolve comments on PR --- ASTNode.h | 7 +------ ASTree.cpp | 3 ++- pyc_object.h | 20 +++++++++++--------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/ASTNode.h b/ASTNode.h index 1b2e643e7..0f7ebc28b 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -24,7 +24,7 @@ class ASTNode { NODE_LOCALS, }; - ASTNode(int type = NODE_INVALID, bool unpacked = false) : m_refs(), m_type(type), m_processed(), m_unpacked(unpacked) { } + ASTNode(int type = NODE_INVALID) : m_refs(), m_type(type), m_processed() { } virtual ~ASTNode() { } int type() const { return internalGetType(this); } @@ -32,15 +32,10 @@ class ASTNode { bool processed() const { return m_processed; } void setProcessed() { m_processed = true; } - bool unpacked() const { return m_unpacked; } - void setUnpacked() { m_unpacked = true; } - private: int m_refs; int m_type; bool m_processed; - // unpack this node into constituent values - bool m_unpacked; // Hack to make clang happy :( static int internalGetType(const ASTNode *node) diff --git a/ASTree.cpp b/ASTree.cpp index 472d04875..a0946c683 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1476,7 +1476,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) // rhs is a variable, so to extend the list // we need to unpack rhs PycRef unpacked_ref = rhs; - unpacked_ref.setUnpacked(); + unpacked_ref.setUnpacked(true); result.push_back(unpacked_ref); stack.push(new ASTList(result)); @@ -2644,6 +2644,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef iterable = stack.top(); stack.pop(); + // Not sure how to combine these two conditions if (iterable.type() == ASTNode::NODE_LIST) { PycRef list = iterable.cast(); for (PycRef n: list->values()) { diff --git a/pyc_object.h b/pyc_object.h index 00140ec3b..56baad842 100644 --- a/pyc_object.h +++ b/pyc_object.h @@ -31,8 +31,6 @@ class PycRef { m_obj->delRef(); } - // Most operators should deal with m_unpack but we leave it as is for now - PycRef<_Obj>& operator=(_Obj* obj) { if (obj) @@ -40,6 +38,7 @@ class PycRef { if (m_obj) m_obj->delRef(); m_obj = obj; + m_unpack = false; return *this; } @@ -50,16 +49,20 @@ class PycRef { if (m_obj) m_obj->delRef(); m_obj = obj.m_obj; + m_unpack = obj.m_unpack; return *this; } PycRef<_Obj>& operator=(PycRef<_Obj>&& obj) noexcept { m_obj = obj.m_obj; + m_unpack = obj.m_unpack; obj.m_obj = nullptr; + obj.m_unpack = false; return *this; } + // TODO: Handle m_unpack for remaining operators bool operator==(_Obj* obj) const { return m_obj == obj; } bool operator==(const PycRef<_Obj>& obj) const { return m_obj == obj.m_obj; } bool operator!=(_Obj* obj) const { return m_obj != obj; } @@ -82,23 +85,22 @@ class PycRef { throw std::bad_cast(); PycRef<_Cast> result = casted_obj; - if (m_unpack) { - result.setUnpacked(); - } + result.setUnpacked(m_unpack); + return result; } bool isIdent(const _Obj* obj) const { return m_obj == obj; } bool isUnpacked() const { return m_unpack; } - void setUnpacked() { m_unpack = true; } + void setUnpacked(bool unpack) { m_unpack = unpack; } private: _Obj* m_obj; - // References to an object can be either packed or unpacked - // Usually unpacked references will be used with variables but - // they may arise in other places as well. + // References to an object can be either packed or unpacked. + // Usually unpacked references will be used with variables + // or lists but they may arise in other places as well. bool m_unpack; };