diff --git a/ASTree.cpp b/ASTree.cpp index 62bb4eb49..85ebfe8e4 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -88,8 +88,23 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) bool variable_annotations = false; while (!source.atEof()) { -#if defined(BLOCK_DEBUG) || defined(STACK_DEBUG) + curpos = pos; + bc_next(source, mod, opcode, operand, pos); + +#if defined(BLOCK_DEBUG) || defined(STACK_DEBUG) || defined(ASM_DEBUG) fprintf(stderr, "%-7d", pos); + #ifdef ASM_DEBUG + { + const int asm_column_width = 40; + std::string s = bc_instruction_to_string(code, mod, pos, opcode, operand).c_str(); + if (s.size() > asm_column_width) { + // fixed-size column + s = s.substr(0, asm_column_width - 3) + "..."; + } + s.insert(s.end(), asm_column_width - s.size(), ' '); + fputs(s.c_str(), stderr); + } + #endif #ifdef STACK_DEBUG fprintf(stderr, "%-5d", (unsigned int)stack_hist.size() + 1); #endif @@ -101,9 +116,6 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) fprintf(stderr, "\n"); #endif - curpos = pos; - bc_next(source, mod, opcode, operand, pos); - if (need_try && opcode != Pyc::SETUP_EXCEPT_A) { need_try = false; @@ -2763,9 +2775,12 @@ void print_src(PycRef node, PycModule* mod) print_formatted_value(val.cast(), mod); break; case ASTNode::NODE_OBJECT: - // When printing a piece of the f-string, keep the quote style consistent. - // This avoids problems when ''' or """ is part of the string. - print_const(val.cast()->object(), mod, F_STRING_QUOTE); + { + // When printing a piece of the f-string, keep the quote style consistent. + // This avoids problems when ''' or """ is part of the string. + std::string s = const_to_string(val.cast()->object(), mod, F_STRING_QUOTE); + fputs(s.c_str(), pyc_output); + } break; default: fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type()); @@ -2925,7 +2940,8 @@ void print_src(PycRef node, PycModule* mod) PycRef code = obj.cast(); decompyle(code, mod); } else { - print_const(obj, mod); + std::string s = const_to_string(obj, mod); + fputs(s.c_str(), pyc_output); } } break; @@ -3343,8 +3359,8 @@ bool print_docstring(PycRef obj, int indent, PycModule* mod) } if (prefix != -1) { start_line(indent); - OutputString(obj.cast(), prefix, true); - fputs("\n", pyc_output); + std::string s = OutputString(obj.cast(), prefix, true) + "\n"; + fputs(s.c_str(), pyc_output); return true; } else return false; diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a3b979b0..e5e47d05a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Debug options. option(ENABLE_BLOCK_DEBUG "Enable block debugging" OFF) option(ENABLE_STACK_DEBUG "Enable stack debugging" OFF) +option(ENABLE_ASM_DEBUG "Enable assembly debugging" OFF) # Turn debug defs on if they're enabled. if (ENABLE_BLOCK_DEBUG) @@ -15,6 +16,9 @@ endif() if (ENABLE_STACK_DEBUG) add_definitions(-DSTACK_DEBUG) endif() +if (ENABLE_ASM_DEBUG) + add_definitions(-DASM_DEBUG) +endif() # For generating the bytes tables find_package(PythonInterp REQUIRED) diff --git a/bytecode.cpp b/bytecode.cpp index 12ed09868..1030edc07 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -2,6 +2,9 @@ #include "bytecode.h" #include #include +#include +#include +#include #ifdef _MSC_VER #define snprintf _snprintf @@ -156,21 +159,22 @@ bool Pyc::IsCompareArg(int opcode) return (opcode == Pyc::COMPARE_OP_A); } -void print_const(PycRef obj, PycModule* mod, const char* parent_f_string_quote) +std::string const_to_string(PycRef obj, PycModule* mod, const char* parent_f_string_quote) { + std::string result; + if (obj == NULL) { - fputs("", pyc_output); - return; + return ""; } switch (obj->type()) { case PycObject::TYPE_STRING: - OutputString(obj.cast(), mod->strIsUnicode() ? 'b' : 0, - false, pyc_output, parent_f_string_quote); + result += OutputString(obj.cast(), mod->strIsUnicode() ? 'b' : 0, + false, parent_f_string_quote); break; case PycObject::TYPE_UNICODE: - OutputString(obj.cast(), mod->strIsUnicode() ? 0 : 'u', - false, pyc_output, parent_f_string_quote); + result += OutputString(obj.cast(), mod->strIsUnicode() ? 0 : 'u', + false, parent_f_string_quote); break; case PycObject::TYPE_STRINGREF: case PycObject::TYPE_INTERNED: @@ -179,105 +183,105 @@ void print_const(PycRef obj, PycModule* mod, const char* parent_f_str case PycObject::TYPE_SHORT_ASCII: case PycObject::TYPE_SHORT_ASCII_INTERNED: if (mod->majorVer() >= 3) - OutputString(obj.cast(), 0, false, pyc_output, parent_f_string_quote); + result += OutputString(obj.cast(), 0, false, parent_f_string_quote); else - OutputString(obj.cast(), mod->strIsUnicode() ? 'b' : 0, - false, pyc_output, parent_f_string_quote); + result += OutputString(obj.cast(), mod->strIsUnicode() ? 'b' : 0, + false, parent_f_string_quote); break; case PycObject::TYPE_TUPLE: case PycObject::TYPE_SMALL_TUPLE: { - fputs("(", pyc_output); + result = "("; PycTuple::value_t values = obj.cast()->values(); auto it = values.cbegin(); if (it != values.cend()) { - print_const(*it, mod); + result += const_to_string(*it, mod); while (++it != values.cend()) { - fputs(", ", pyc_output); - print_const(*it, mod); + result += ", "; + result += const_to_string(*it, mod); } } if (values.size() == 1) - fputs(",)", pyc_output); + result += ",)"; else - fputs(")", pyc_output); + result += ")"; } break; case PycObject::TYPE_LIST: { - fputs("[", pyc_output); + result += "["; PycList::value_t values = obj.cast()->values(); auto it = values.cbegin(); if (it != values.cend()) { - print_const(*it, mod); + result += const_to_string(*it, mod); while (++it != values.cend()) { - fputs(", ", pyc_output); - print_const(*it, mod); + result += ", "; + result += const_to_string(*it, mod); } } - fputs("]", pyc_output); + result += "]"; } break; case PycObject::TYPE_DICT: { - fputs("{", pyc_output); + result += "{"; PycDict::key_t keys = obj.cast()->keys(); PycDict::value_t values = obj.cast()->values(); auto ki = keys.cbegin(); auto vi = values.cbegin(); if (ki != keys.cend()) { - print_const(*ki, mod); - fputs(": ", pyc_output); - print_const(*vi, mod); + result += const_to_string(*ki, mod); + result += ": "; + result += const_to_string(*vi, mod); while (++ki != keys.cend()) { ++vi; - fputs(", ", pyc_output); - print_const(*ki, mod); - fputs(": ", pyc_output); - print_const(*vi, mod); + result += ", "; + result += const_to_string(*ki, mod); + result += ": "; + result += const_to_string(*vi, mod); } } - fputs("}", pyc_output); + result += "}"; } break; case PycObject::TYPE_SET: { - fputs("{", pyc_output); + result += "{"; PycSet::value_t values = obj.cast()->values(); auto it = values.cbegin(); if (it != values.cend()) { - print_const(*it, mod); + result += const_to_string(*it, mod); while (++it != values.cend()) { - fputs(", ", pyc_output); - print_const(*it, mod); + result += ", "; + result += const_to_string(*it, mod); } } - fputs("}", pyc_output); + result += "}"; } break; case PycObject::TYPE_NONE: - fputs("None", pyc_output); + result += "None"; break; case PycObject::TYPE_TRUE: - fputs("True", pyc_output); + result += "True"; break; case PycObject::TYPE_FALSE: - fputs("False", pyc_output); + result += "False"; break; case PycObject::TYPE_ELLIPSIS: - fputs("...", pyc_output); + result += "..."; break; case PycObject::TYPE_INT: - fprintf(pyc_output, "%d", obj.cast()->value()); + result += string_format("%d", obj.cast()->value()); break; case PycObject::TYPE_LONG: - fprintf(pyc_output, "%s", obj.cast()->repr().c_str()); + result += string_format("%s", obj.cast()->repr().c_str()); break; case PycObject::TYPE_FLOAT: - fprintf(pyc_output, "%s", obj.cast()->value()); + result += string_format("%s", obj.cast()->value()); break; case PycObject::TYPE_COMPLEX: - fprintf(pyc_output, "(%s+%sj)", obj.cast()->value(), + result += string_format("(%s+%sj)", obj.cast()->value(), obj.cast()->imag()); break; case PycObject::TYPE_BINARY_FLOAT: @@ -287,30 +291,31 @@ void print_const(PycRef obj, PycModule* mod, const char* parent_f_str bool is_negative = std::signbit(value); if (std::isnan(value)) { if (is_negative) { - fprintf(pyc_output, "float('-nan')"); + result += string_format("float('-nan')"); } else { - fprintf(pyc_output, "float('nan')"); + result += string_format("float('nan')"); } } else if (std::isinf(value)) { if (is_negative) { - fprintf(pyc_output, "float('-inf')"); + result += string_format("float('-inf')"); } else { - fprintf(pyc_output, "float('inf')"); + result += string_format("float('inf')"); } } else { - fprintf(pyc_output, "%g", value); + result += string_format("%g", value); } } break; case PycObject::TYPE_BINARY_COMPLEX: - fprintf(pyc_output, "(%g+%gj)", obj.cast()->value(), + result += string_format("(%g+%gj)", obj.cast()->value(), obj.cast()->imag()); break; case PycObject::TYPE_CODE: case PycObject::TYPE_CODE2: - fprintf(pyc_output, " %s", obj.cast()->name()->value()); + result += string_format(" %s", obj.cast()->name()->value()); break; } + return result; } void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos) @@ -343,14 +348,75 @@ void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& } } -void bc_disasm(PycRef code, PycModule* mod, int indent) -{ +std::string bc_instruction_to_string(PycRef code, PycModule* mod, int pos, int opcode, int operand) { static const char *cmp_strings[] = { "<", "<=", "==", "!=", ">", ">=", "in", "not in", "is", "is not", "", "" }; static const size_t cmp_strings_len = sizeof(cmp_strings) / sizeof(cmp_strings[0]); + std::string result = string_format("%-24s", Pyc::OpcodeName(opcode)); + + if (opcode >= Pyc::PYC_HAVE_ARG) { + if (Pyc::IsConstArg(opcode)) { + try { + auto constParam = code->getConst(operand); + result += string_format("%d: ", operand); + result += const_to_string(constParam, mod); + } catch (const std::out_of_range &) { + result += string_format("%d ", operand); + } + } else if (Pyc::IsNameArg(opcode)) { + try { + result += string_format("%d: %s", operand, code->getName(operand)->value()); + } catch (const std::out_of_range &) { + result += string_format("%d ", operand); + } + } else if (Pyc::IsVarNameArg(opcode)) { + try { + result += string_format("%d: %s", operand, code->getVarName(operand)->value()); + } catch (const std::out_of_range &) { + result += string_format("%d ", operand); + } + } else if (Pyc::IsCellArg(opcode)) { + try { + result += string_format("%d: %s", operand, code->getCellVar(operand)->value()); + } catch (const std::out_of_range &) { + result += string_format("%d ", operand); + } + } else if (Pyc::IsJumpOffsetArg(opcode)) { + int offs = operand; + if (mod->verCompare(3, 10) >= 0) + offs *= sizeof(uint16_t); // BPO-27129 + result += string_format("%d (to %d)", operand, pos+offs); + } + else if (Pyc::IsJumpArg(opcode)) { + if (mod->verCompare(3, 10) >= 0) // BPO-27129 + result += string_format("%d (to %d)", operand, int(operand * sizeof(uint16_t))); + else + result += string_format("%d", operand); + } else if (Pyc::IsCompareArg(opcode)) { + if (static_cast(operand) < cmp_strings_len) + result += string_format("%d (%s)", operand, cmp_strings[operand]); + else + result += string_format("%d (UNKNOWN)", operand); + } else if (opcode == Pyc::IS_OP_A) { + result += string_format("%d (%s)", operand, (operand == 0) ? "is" + : (operand == 1) ? "is not" + : "UNKNOWN"); + } else if (opcode == Pyc::CONTAINS_OP_A) { + result += string_format("%d (%s)", operand, (operand == 0) ? "in" + : (operand == 1) ? "not in" + : "UNKNOWN"); + } else { + result += string_format("%d", operand); + } + } + return result; +} + +void bc_disasm(PycRef code, PycModule* mod, int indent) +{ PycBuffer source(code->code()->value(), code->code()->length()); int opcode, operand; @@ -359,65 +425,10 @@ void bc_disasm(PycRef code, PycModule* mod, int indent) for (int i=0; i= Pyc::PYC_HAVE_ARG) { - if (Pyc::IsConstArg(opcode)) { - try { - auto constParam = code->getConst(operand); - fprintf(pyc_output, "%d: ", operand); - print_const(constParam, mod); - } catch (const std::out_of_range &) { - fprintf(pyc_output, "%d ", operand); - } - } else if (Pyc::IsNameArg(opcode)) { - try { - fprintf(pyc_output, "%d: %s", operand, code->getName(operand)->value()); - } catch (const std::out_of_range &) { - fprintf(pyc_output, "%d ", operand); - } - } else if (Pyc::IsVarNameArg(opcode)) { - try { - fprintf(pyc_output, "%d: %s", operand, code->getVarName(operand)->value()); - } catch (const std::out_of_range &) { - fprintf(pyc_output, "%d ", operand); - } - } else if (Pyc::IsCellArg(opcode)) { - try { - fprintf(pyc_output, "%d: %s", operand, code->getCellVar(operand)->value()); - } catch (const std::out_of_range &) { - fprintf(pyc_output, "%d ", operand); - } - } else if (Pyc::IsJumpOffsetArg(opcode)) { - int offs = operand; - if (mod->verCompare(3, 10) >= 0) - offs *= sizeof(uint16_t); // BPO-27129 - fprintf(pyc_output, "%d (to %d)", operand, pos+offs); - } - else if (Pyc::IsJumpArg(opcode)) { - if (mod->verCompare(3, 10) >= 0) // BPO-27129 - fprintf(pyc_output, "%d (to %d)", operand, int(operand * sizeof(uint16_t))); - else - fprintf(pyc_output, "%d", operand); - } else if (Pyc::IsCompareArg(opcode)) { - if (static_cast(operand) < cmp_strings_len) - fprintf(pyc_output, "%d (%s)", operand, cmp_strings[operand]); - else - fprintf(pyc_output, "%d (UNKNOWN)", operand); - } else if (opcode == Pyc::IS_OP_A) { - fprintf(pyc_output, "%d (%s)", operand, (operand == 0) ? "is" - : (operand == 1) ? "is not" - : "UNKNOWN"); - } else if (opcode == Pyc::CONTAINS_OP_A) { - fprintf(pyc_output, "%d (%s)", operand, (operand == 0) ? "in" - : (operand == 1) ? "not in" - : "UNKNOWN"); - } else { - fprintf(pyc_output, "%d", operand); - } - } fputs("\n", pyc_output); } } diff --git a/bytecode.h b/bytecode.h index a9dc150ef..6ae4d8bbb 100644 --- a/bytecode.h +++ b/bytecode.h @@ -30,6 +30,7 @@ bool IsCompareArg(int opcode); } -void print_const(PycRef obj, PycModule* mod, const char* parent_f_string_quote = nullptr); +std::string const_to_string(PycRef obj, PycModule* mod, const char* parent_f_string_quote = nullptr); void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos); +std::string bc_instruction_to_string(PycRef code, PycModule* mod, int pos, int opcode, int operand); void bc_disasm(PycRef code, PycModule* mod, int indent); diff --git a/pyc_string.cpp b/pyc_string.cpp index 0190a16e0..255b0d858 100644 --- a/pyc_string.cpp +++ b/pyc_string.cpp @@ -85,16 +85,17 @@ bool PycString::isEqual(PycRef obj) const return isEqual(strObj->m_value); } -void OutputString(PycRef str, char prefix, bool triple, FILE* F, const char* parent_f_string_quote) +std::string OutputString(PycRef str, char prefix, bool triple, const char* parent_f_string_quote) { + std::string result; if (prefix != 0) - fputc(prefix, F); + result += prefix; const char* ch = str->value(); int len = str->length(); if (ch == 0) { - fputs("''", F); - return; + result += "''"; + return result; } // Determine preferred quote style (Emulate Python's method) @@ -118,51 +119,52 @@ void OutputString(PycRef str, char prefix, bool triple, FILE* F, cons // Output the string if (!parent_f_string_quote) { if (triple) - fputs(useQuotes ? "\"\"\"" : "'''", F); + result += useQuotes ? "\"\"\"" : "'''"; else - fputc(useQuotes ? '"' : '\'', F); + result += useQuotes ? '"' : '\''; } while (len--) { if (*ch < 0x20 || *ch == 0x7F) { if (*ch == '\r') { - fputs("\\r", F); + result += "\\r"; } else if (*ch == '\n') { if (triple) - fputc('\n', F); + result += '\n'; else - fputs("\\n", F); + result += "\\n"; } else if (*ch == '\t') { - fputs("\\t", F); + result += "\\t"; } else { - fprintf(F, "\\x%02x", (*ch & 0xFF)); + result += string_format("\\x%02x", (*ch & 0xFF)); } } else if ((unsigned char)(*ch) >= 0x80) { if (str->type() == PycObject::TYPE_UNICODE) { // Unicode stored as UTF-8... Let the stream interpret it - fputc(*ch, F); + result += *ch; } else { - fprintf(F, "\\x%x", (*ch & 0xFF)); + result += string_format("\\x%x", (*ch & 0xFF)); } } else { if (!useQuotes && *ch == '\'') - fputs("\\'", F); + result += "\\'"; else if (useQuotes && *ch == '"') - fputs("\\\"", F); + result += "\\\""; else if (*ch == '\\') - fputs("\\\\", F); + result += "\\\\"; else if (parent_f_string_quote && *ch == '{') - fputs("{{", F); + result += "{{"; else if (parent_f_string_quote && *ch == '}') - fputs("}}", F); + result += "}}"; else - fputc(*ch, F); + result += *ch; } ch++; } if (!parent_f_string_quote) { if (triple) - fputs(useQuotes ? "\"\"\"" : "'''", F); + result += useQuotes ? "\"\"\"" : "'''"; else - fputc(useQuotes ? '"' : '\'', F); + result += useQuotes ? '"' : '\''; } + return result; } diff --git a/pyc_string.h b/pyc_string.h index 0ad17d688..a5d5bcf10 100644 --- a/pyc_string.h +++ b/pyc_string.h @@ -5,6 +5,8 @@ #include "data.h" #include #include +#include +#include class PycString : public PycObject { public: @@ -31,7 +33,18 @@ class PycString : public PycObject { std::string m_value; }; -void OutputString(PycRef str, char prefix = 0, bool triple = false, - FILE* F = pyc_output, const char* parent_f_string_quote = nullptr); +std::string OutputString(PycRef str, char prefix = 0, bool triple = false, + const char* parent_f_string_quote = nullptr); + +template +std::string string_format( const std::string& format, Args ... args ) +{ + int size_s = std::snprintf( nullptr, 0, format.c_str(), args ... ) + 1; // Extra space for '\0' + if( size_s <= 0 ){ throw std::runtime_error( "Error during formatting." ); } + auto size = static_cast( size_s ); + std::unique_ptr buf( new char[ size ] ); + std::snprintf( buf.get(), size, format.c_str(), args ... ); + return std::string( buf.get(), buf.get() + size - 1 ); // We don't want the '\0' inside +} #endif diff --git a/pycdas.cpp b/pycdas.cpp index 40333b796..cd7f21ff5 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -131,14 +131,18 @@ void output_object(PycRef obj, PycModule* mod, int indent) } break; case PycObject::TYPE_STRING: - iputs(indent, ""); - OutputString(obj.cast(), mod->strIsUnicode() ? 'b' : 0); - fputs("\n", pyc_output); + { + iputs(indent, ""); + std::string s = OutputString(obj.cast(), mod->strIsUnicode() ? 'b' : 0) + "\n"; + fputs(s.c_str(), pyc_output); + } break; case PycObject::TYPE_UNICODE: - iputs(indent, ""); - OutputString(obj.cast(), mod->strIsUnicode() ? 0 : 'u'); - fputs("\n", pyc_output); + { + iputs(indent, ""); + std::string s = OutputString(obj.cast(), mod->strIsUnicode() ? 0 : 'u') + "\n"; + fputs(s.c_str(), pyc_output); + } break; case PycObject::TYPE_STRINGREF: case PycObject::TYPE_INTERNED: @@ -146,12 +150,16 @@ void output_object(PycRef obj, PycModule* mod, int indent) case PycObject::TYPE_ASCII_INTERNED: case PycObject::TYPE_SHORT_ASCII: case PycObject::TYPE_SHORT_ASCII_INTERNED: - iputs(indent, ""); - if (mod->majorVer() >= 3) - OutputString(obj.cast(), 0); - else - OutputString(obj.cast(), mod->strIsUnicode() ? 'b' : 0); - fputs("\n", pyc_output); + { + iputs(indent, ""); + std::string s; + if (mod->majorVer() >= 3) + s = OutputString(obj.cast(), 0); + else + s = OutputString(obj.cast(), mod->strIsUnicode() ? 'b' : 0); + fputs(s.c_str(), pyc_output); + fputs("\n", pyc_output); + } break; case PycObject::TYPE_TUPLE: case PycObject::TYPE_SMALL_TUPLE: