diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index 0d341544..0382db94 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -25,6 +25,7 @@ Blocked features are rejected before execution with exit code 2. - ✅ `ping [-c N] [-W DURATION] [-i DURATION] [-q] [-4|-6] [-h] HOST` — send ICMP echo requests to a network host and report round-trip statistics; `-f` (flood), `-b` (broadcast), `-s` (packet size), `-I` (interface), `-p` (pattern), and `-R` (record route) are blocked; count/wait/interval are clamped to safe ranges with a warning; multicast, unspecified (`0.0.0.0`/`::`), and broadcast addresses (IPv4 last-octet `.255`) are rejected — note: directed broadcasts on non-standard subnets (e.g. `.127` on a `/25`) are not blocked without subnet-mask knowledge - ✅ `ps [-e|-A] [-f] [-p PIDLIST]` — report process status; default shows current-session processes; `-e`/`-A` shows all; `-f` adds UID/PPID/STIME columns; `-p` selects by PID list - ✅ `printf FORMAT [ARGUMENT]...` — format and print data to stdout; supports `%s`, `%b`, `%c`, `%d`, `%i`, `%o`, `%u`, `%x`, `%X`, `%e`, `%E`, `%f`, `%F`, `%g`, `%G`, `%%`; format reuse for excess arguments; `%n` rejected (security risk); `-v` rejected +- ✅ `python [-c CODE] [-h] [SCRIPT | -] [ARG ...]` — execute Python 3 source code using the gpython pure-Go interpreter (no CPython required); `-c CODE` runs inline code; a positional `SCRIPT` argument runs a file (via AllowedPaths sandbox); `-` reads from stdin; security sandbox removes all OS exec/write/spawn functions, replaces `open()` with a read-only AllowedPaths-aware version, and blocks `tempfile`/`glob` modules; source files and stdin are capped at 1 MiB; stdlib includes `math`, `string`, `sys`, `os` (read-only), `binascii`, `json` (dumps only; loads raises ValueError), `collections` (OrderedDict, Counter, deque, defaultdict — defaultdict default_factory raises NotImplementedError if provided); blocked modules: `subprocess`, `socket`, `ctypes`, `tempfile`, `glob`, `threading`, `multiprocessing`, `asyncio`, `re` (raises ImportError on import); known limitation: `import os.path` raises ImportError — use `from os import path` instead - ✅ `sed [-n] [-e SCRIPT] [-E|-r] [SCRIPT] [FILE]...` — stream editor for filtering and transforming text; uses RE2 regex engine; `-i`/`-f` rejected; `e`/`w`/`W`/`r`/`R` commands blocked - ✅ `strings [-a] [-n MIN] [-t o|d|x] [-o] [-f] [-s SEP] [FILE]...` — print printable character sequences in files (default min length 4); offsets via `-t`/`-o`; filename prefix via `-f`; custom separator via `-s` - ✅ `tail [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the last part of files (default: last 10 lines); supports `+N` offset mode; `-f`/`--follow` is rejected diff --git a/analysis/symbols_builtins.go b/analysis/symbols_builtins.go index bd063a52..d4756b8d 100644 --- a/analysis/symbols_builtins.go +++ b/analysis/symbols_builtins.go @@ -177,6 +177,115 @@ var builtinPerCommandSymbols = map[string][]string{ "strings.Split", // 🟢 splits a string by separator into a slice; pure function, no I/O. "strings.TrimSpace", // 🟢 removes leading/trailing whitespace; pure function. }, + "python": { + "bufio.NewReader", // 🟢 wraps an io.Reader with buffering for readline support; no write capability. + "bufio.Reader", // 🟢 type reference for buffered reader; no write capability. + "context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects. + "encoding/base64.RawStdEncoding", // 🟢 base64 encoding/decoding without padding; pure function, no I/O. + "encoding/base64.StdEncoding", // 🟢 base64 encoding/decoding of byte data; pure function, no I/O. + "encoding/hex.DecodeString", // 🟢 hex decoding; pure function, no I/O. + "encoding/hex.EncodeToString", // 🟢 hex encoding; pure function, no I/O. + "fmt.Errorf", // 🟢 error formatting; pure function, no I/O. + "fmt.Fprint", // 🟢 writes to a writer; used only for output to stdout/stderr, no file-write capability. + "fmt.Fprintf", // 🟢 formats and writes to a writer; used only for error output to stderr. + "fmt.Fprintln", // 🟢 writes formatted line to a writer; used only for traceback output to stderr. + "fmt.Sprintf", // 🟢 string formatting; pure function, no I/O. + "hash/crc32.IEEETable", // 🟢 precomputed CRC32 lookup table constant; pure constant. + "hash/crc32.Update", // 🟢 incremental CRC32 update; pure function, no I/O. + "io.EOF", // 🟢 sentinel error value for end-of-file; read-only constant, no I/O. + "io.LimitReader", // 🟢 caps source-code reads at 1 MiB and wraps file reads; pure wrapper, no I/O by itself. + "io.ReadAll", // 🟠 reads all bytes from a reader; always bounded by io.LimitReader in this package. + "io.ReadWriteCloser", // 🟢 type reference for sandbox file handle; write mode is blocked at runtime. + "io.Reader", // 🟢 type reference for stdin reader; no write capability. + "io.Writer", // 🟢 type reference for stdout/stderr writers; used only for output, not file writes. + "io/fs.DirEntry", // 🟢 interface type for directory entries returned by ReadDir callback; no I/O by itself. + "io/fs.FileInfo", // 🟢 interface type for file metadata returned by Stat callback; no I/O by itself. + "math.Abs", // 🟢 absolute value; pure function, no I/O. + "math.Acos", // 🟢 arc cosine for Python math module; pure function, no I/O. + "math.Asin", // 🟢 arc sine for Python math module; pure function, no I/O. + "math.Atan", // 🟢 arc tangent for Python math module; pure function, no I/O. + "math.Atan2", // 🟢 two-argument arc tangent for Python math module; pure function, no I/O. + "math.Ceil", // 🟢 ceiling function; pure function, no I/O. + "math.Cos", // 🟢 cosine; pure function, no I/O. + "math.E", // 🟢 Euler's number constant; pure constant. + "math.Exp", // 🟢 exponential; pure function, no I/O. + "math.Floor", // 🟢 floor function; pure function, no I/O. + "math.Hypot", // 🟢 Euclidean norm for Python math.hypot(); pure function, no I/O. + "math.Inf", // 🟢 returns infinity; pure function, no I/O. + "math.IsInf", // 🟢 checks for infinity; pure function, no I/O. + "math.IsNaN", // 🟢 checks for NaN; pure function, no I/O. + "math.Log", // 🟢 natural logarithm; pure function, no I/O. + "math.Log10", // 🟢 base-10 logarithm; pure function, no I/O. + "math.Log2", // 🟢 base-2 logarithm; pure function, no I/O. + "math.MaxInt64", // 🟢 maximum int64 constant; used for bounds checks; pure constant. + "math.Mod", // 🟢 floating-point modulo for Python float %; pure function, no I/O. + "math.NaN", // 🟢 returns NaN; pure function, no I/O. + "math.Pi", // 🟢 pi constant; pure constant. + "math.Pow", // 🟢 power function; pure function, no I/O. + "math.Pow10", // 🟢 power of 10 for float formatting; pure function, no I/O. + "math.RoundToEven", // 🟢 banker's rounding for Python round(); pure function, no I/O. + "math.Sin", // 🟢 sine; pure function, no I/O. + "math.Sqrt", // 🟢 square root; pure function, no I/O. + "math.Tan", // 🟢 tangent; pure function, no I/O. + "math.Trunc", // 🟢 truncate to integer for Python math.trunc(); pure function, no I/O. + "math/big.Float", // 🟢 arbitrary-precision float type; pure in-memory computation, no I/O. + "math/big.Int", // 🟢 arbitrary-precision integer type; pure in-memory computation, no I/O. + "math/big.NewInt", // 🟢 creates arbitrary-precision integer; pure function, no I/O. + "os.DevNull", // 🟢 device null path constant; pure constant. + "os.FileMode", // 🟢 file mode type; used only as argument type in the Open callback signature. + "os.IsNotExist", // 🟢 checks whether an error indicates file-not-found; pure predicate, no I/O. + "os.O_RDONLY", // 🟢 read-only file flag; pure constant. + "path/filepath.Base", // 🟢 returns the last element of a path for os.path.basename(); pure function, no I/O. + "path/filepath.Clean", // 🟢 normalises path before use; pure function, no I/O. + "path/filepath.Dir", // 🟢 returns directory component of path; pure function, no I/O. + "path/filepath.Ext", // 🟢 returns file extension; pure function, no I/O. + "path/filepath.Join", // 🟢 joins path elements; pure function, no I/O. + "path/filepath.ListSeparator", // 🟢 OS path list separator constant; pure constant. + "path/filepath.Separator", // 🟢 OS path separator constant; pure constant. + "strconv.Atoi", // 🟢 string-to-int conversion; pure function, no I/O. + "strconv.FormatFloat", // 🟢 float to string conversion; pure function, no I/O. + "strconv.FormatInt", // 🟢 int to string conversion; pure function, no I/O. + "strconv.ParseFloat", // 🟢 string to float conversion; pure function, no I/O. + "strconv.ParseInt", // 🟢 string to int64 with base; pure function, no I/O. + "strconv.ParseUint", // 🟢 string to uint64 with base; pure function, no I/O. + "strings.Builder", // 🟢 efficient in-memory string builder; pure in-memory buffer, no I/O. + "strings.ContainsAny", // 🟢 checks if string contains any of a set of runes; pure function, no I/O. + "strings.ContainsRune", // 🟢 checks if a rune appears in a string; pure function, no I/O. + "strings.Count", // 🟢 counts non-overlapping instances of a substring; pure function, no I/O. + "strings.Fields", // 🟢 splits a string on whitespace; pure function, no I/O. + "strings.HasPrefix", // 🟢 checks string prefix; pure function, no I/O. + "strings.HasSuffix", // 🟢 checks string suffix; pure function, no I/O. + "strings.Index", // 🟢 finds first occurrence of a substring; pure function, no I/O. + "strings.IndexAny", // 🟢 finds first occurrence of any rune in a string; pure function, no I/O. + "strings.Join", // 🟢 joins strings with a separator for str.join(); pure function, no I/O. + "strings.LastIndex", // 🟢 finds last occurrence of a substring; pure function, no I/O. + "strings.NewReader", // 🟢 creates an in-memory io.Reader from a string; pure function, no I/O. + "strings.Repeat", // 🟢 repeats a string n times; pure function, no I/O. + "strings.Replace", // 🟢 replaces occurrences of a substring; pure function, no I/O. + "strings.ReplaceAll", // 🟢 replaces all occurrences of a substring; pure function, no I/O. + "strings.Split", // 🟢 splits string on a separator; pure function, no I/O. + "strings.SplitN", // 🟢 splits string into at most n substrings; pure function, no I/O. + "strings.Title", // 🟢 title-cases words in a string; pure function, no I/O. + "strings.ToLower", // 🟢 converts string to lowercase; pure function, no I/O. + "strings.ToUpper", // 🟢 converts string to uppercase; pure function, no I/O. + "strings.Trim", // 🟢 trims leading and trailing characters; pure function, no I/O. + "strings.TrimLeft", // 🟢 trims leading characters; pure function, no I/O. + "strings.TrimLeftFunc", // 🟢 trims leading runes matching a predicate; pure function, no I/O. + "strings.TrimRight", // 🟢 trims trailing characters; pure function, no I/O. + "strings.TrimRightFunc", // 🟢 trims trailing runes matching a predicate; pure function, no I/O. + "strings.TrimSpace", // 🟢 removes leading/trailing whitespace; pure function, no I/O. + "strings.TrimSuffix", // 🟢 trims a suffix from a string; pure function, no I/O. + "unicode.IsDigit", // 🟢 checks if a rune is a digit; pure function, no I/O. + "unicode.IsLetter", // 🟢 checks if a rune is a letter; pure function, no I/O. + "unicode.MaxRune", // 🟢 maximum valid Unicode code point constant; pure constant. + "unicode/utf8.DecodeRuneInString", // 🟢 decodes the first rune of a string; pure function, no I/O. + "unicode/utf8.RuneCountInString", // 🟢 counts runes in a string; pure function, no I/O. + "unicode/utf8.RuneLen", // 🟢 returns the number of bytes required to encode a rune; pure function, no I/O. + "unicode/utf8.ValidString", // 🟢 checks if a string is valid UTF-8; pure function, no I/O. + "runtime.GOOS", // 🟢 build-time OS identifier constant; used to set sys.platform; read-only, no exec capability. + "runtime.Stack", // 🟢 reads current goroutine stack header to extract goroutine ID; read-only, no exec capability. + "sync.Map", // 🟢 concurrent-safe map for per-goroutine callObject registration; no I/O, no side effects. + }, "printf": { "context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects. "errors.As", // 🟢 error type assertion; pure function, no I/O. @@ -399,31 +508,45 @@ var builtinPerCommandSymbols = map[string][]string{ } var builtinAllowedSymbols = []string{ - "bufio.NewScanner", // 🟢 line-by-line input reading (e.g. head, cat); no write or exec capability. - "bufio.Scanner", // 🟢 scanner type for buffered input reading; no write or exec capability. - "bufio.SplitFunc", // 🟢 type for custom scanner split functions; pure type, no I/O. - "bytes.Buffer", // 🟢 in-memory buffer to capture command output; no I/O side effects. - "bytes.Equal", // 🟢 compares two byte slices for equality; pure function, no I/O. - "bytes.IndexByte", // 🟢 finds a byte in a byte slice; pure function, no I/O. - "bytes.NewReader", // 🟢 wraps a byte slice as an io.Reader; pure in-memory, no I/O. - "context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects. - "context.WithTimeout", // 🟢 creates a child context with a deadline; no filesystem or network I/O itself. - "errors.As", // 🟢 error type assertion; pure function, no I/O. - "errors.Is", // 🟢 error comparison; pure function, no I/O. - "errors.New", // 🟢 creates a simple error value; pure function, no I/O. - "fmt.Errorf", // 🟢 error formatting; pure function, no I/O. - "fmt.Sprintf", // 🟢 string formatting; pure function, no I/O. + "bufio.NewReader", // 🟢 wraps an io.Reader with buffering for readline support; no write capability. + "bufio.NewScanner", // 🟢 line-by-line input reading (e.g. head, cat); no write or exec capability. + "bufio.Reader", // 🟢 buffered reader type reference; no write capability. + "bufio.Scanner", // 🟢 scanner type for buffered input reading; no write or exec capability. + "bufio.SplitFunc", // 🟢 type for custom scanner split functions; pure type, no I/O. + "bytes.Buffer", // 🟢 in-memory buffer to capture command output; no I/O side effects. + "bytes.Equal", // 🟢 compares two byte slices for equality; pure function, no I/O. + "bytes.IndexByte", // 🟢 finds a byte in a byte slice; pure function, no I/O. + "bytes.NewReader", // 🟢 wraps a byte slice as an io.Reader; pure in-memory, no I/O. + "context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects. + "context.WithTimeout", // 🟢 creates a child context with a deadline; no filesystem or network I/O itself. + "encoding/base64.RawStdEncoding", // 🟢 base64 encoding/decoding without padding; pure function, no I/O. + "encoding/base64.StdEncoding", // 🟢 base64 encoding/decoding of byte data; pure function, no I/O. + "encoding/hex.DecodeString", // 🟢 hex decoding; pure function, no I/O. + "encoding/hex.EncodeToString", // 🟢 hex encoding; pure function, no I/O. + "errors.As", // 🟢 error type assertion; pure function, no I/O. + "errors.Is", // 🟢 error comparison; pure function, no I/O. + "errors.New", // 🟢 creates a simple error value; pure function, no I/O. + "fmt.Errorf", // 🟢 error formatting; pure function, no I/O. + "fmt.Fprint", // 🟢 writes to a writer; used only for output to stdout/stderr, no file-write capability. + "fmt.Fprintf", // 🟢 formats and writes to a writer; used only for error output to stderr. + "fmt.Fprintln", // 🟢 writes formatted line to a writer; used only for traceback output to stderr. + "fmt.Sprintf", // 🟢 string formatting; pure function, no I/O. "github.com/prometheus-community/pro-bing.NewPinger", // 🔴 creates an ICMP pinger by resolving host; network I/O is the explicit purpose of the ping builtin. "github.com/prometheus-community/pro-bing.NoopLogger", // 🟢 no-op logger that discards pro-bing internal messages; no side effects. "github.com/prometheus-community/pro-bing.Packet", // 🟢 ICMP packet descriptor struct (received packet data); pure data type, no I/O. "github.com/prometheus-community/pro-bing.Pinger", // 🔴 ICMP pinger struct; network I/O is the explicit purpose of the ping builtin. "github.com/prometheus-community/pro-bing.Statistics", // 🟢 ping round-trip statistics struct; pure data type, no I/O. "golang.org/x/sys/unix.SysctlRaw", // 🟠 macOS: reads kernel socket tables (read-only, no exec, no filesystem). + "hash/crc32.IEEETable", // 🟢 precomputed CRC32 lookup table constant; pure constant. + "hash/crc32.Update", // 🟢 incremental CRC32 update; pure function, no I/O. "io.EOF", // 🟢 sentinel error value; pure constant. + "io.LimitReader", // 🟢 wraps a Reader with a byte-count limit; prevents reading unbounded data; no I/O side effects. "io.MultiReader", // 🟢 combines multiple Readers into one sequential Reader; no I/O side effects. "io.NopCloser", // 🟢 wraps a Reader with a no-op Close; no side effects. + "io.ReadAll", // 🟠 reads all bytes from a Reader; only safe when combined with io.LimitReader to bound allocation. "io.ReadCloser", // 🟢 interface type; no side effects. "io.ReadSeeker", // 🟢 interface type combining Reader and Seeker; no side effects. + "io.ReadWriteCloser", // 🟢 type reference for sandbox file handle; write mode is blocked at runtime. "io.Reader", // 🟢 interface type; no side effects. "io.SeekCurrent", // 🟢 whence constant for Seek(offset, SeekCurrent); pure constant. "io.WriteString", // 🟠 writes a string to a writer; no filesystem access, delegates to Write. @@ -441,16 +564,40 @@ var builtinAllowedSymbols = []string{ "io/fs.ModeSticky", // 🟢 file mode bit constant for sticky bit; pure constant. "io/fs.ModeSymlink", // 🟢 file mode bit constant for symlinks; pure constant. "io/fs.ReadDirFile", // 🟢 read-only directory handle interface; no write capability. + "math.Abs", // 🟢 absolute value; pure function, no I/O. + "math.Acos", // 🟢 arc cosine for Python math module; pure function, no I/O. + "math.Asin", // 🟢 arc sine for Python math module; pure function, no I/O. + "math.Atan", // 🟢 arc tangent for Python math module; pure function, no I/O. + "math.Atan2", // 🟢 two-argument arc tangent for Python math module; pure function, no I/O. "math.Ceil", // 🟢 pure arithmetic; no side effects. + "math.Cos", // 🟢 cosine for Python math module; pure function, no I/O. + "math.E", // 🟢 Euler's number constant; pure constant. + "math.Exp", // 🟢 exponential for Python math module; pure function, no I/O. "math.Floor", // 🟢 pure arithmetic; no side effects. + "math.Hypot", // 🟢 Euclidean norm for Python math.hypot(); pure function, no I/O. "math.Inf", // 🟢 returns positive or negative infinity; pure function, no I/O. "math.IsInf", // 🟢 IEEE 754 infinity check; pure function, no I/O. "math.IsNaN", // 🟢 IEEE 754 NaN check; pure function, no I/O. + "math.Log", // 🟢 natural logarithm for Python math module; pure function, no I/O. + "math.Log10", // 🟢 base-10 logarithm for Python math module; pure function, no I/O. + "math.Log2", // 🟢 base-2 logarithm for Python math module; pure function, no I/O. "math.MaxInt32", // 🟢 integer constant; no side effects. "math.MaxInt64", // 🟢 integer constant; no side effects. "math.MaxUint64", // 🟢 integer constant; no side effects. "math.MinInt64", // 🟢 integer constant; no side effects. + "math.Mod", // 🟢 floating-point modulo for Python float %; pure function, no I/O. "math.NaN", // 🟢 returns IEEE 754 NaN value; pure function, no I/O. + "math.Pi", // 🟢 pi constant; pure constant. + "math.Pow", // 🟢 power function for Python math module; pure function, no I/O. + "math.Pow10", // 🟢 power of 10 for float formatting; pure function, no I/O. + "math.RoundToEven", // 🟢 banker's rounding for Python round(); pure function, no I/O. + "math.Sin", // 🟢 sine for Python math module; pure function, no I/O. + "math.Sqrt", // 🟢 square root for Python math module; pure function, no I/O. + "math.Tan", // 🟢 tangent for Python math module; pure function, no I/O. + "math.Trunc", // 🟢 truncate to integer for Python math.trunc(); pure function, no I/O. + "math/big.Float", // 🟢 arbitrary-precision float type for Python big int arithmetic; pure in-memory computation. + "math/big.Int", // 🟢 arbitrary-precision integer type for Python int arithmetic; pure in-memory computation. + "math/big.NewInt", // 🟢 creates arbitrary-precision integer; pure function, no I/O. "net.DefaultResolver", // 🔴 default system DNS resolver; used for context-aware address lookup; network I/O is the explicit purpose of the ping builtin. "net.FlagBroadcast", // 🟢 interface flag constant: broadcast capability; pure constant, no network connections. "net.IPAddr", // 🟢 resolved IP address struct (IP + Zone); pure data type, no I/O. @@ -465,22 +612,32 @@ var builtinAllowedSymbols = []string{ "net.ParseIP", // 🟢 parses an IP address string into a net.IP; pure function, no I/O. "net.Interface", // 🟢 OS network interface descriptor; read-only struct, no network connections. "net.Interfaces", // 🟠 read-only OS interface enumeration function; no network connections or writes. + "os.DevNull", // 🟢 device null path constant for os.devnull in Python os module; pure constant. "os.FileInfo", // 🟢 file metadata interface returned by Stat; no I/O side effects. + "os.FileMode", // 🟢 file mode type used in sandbox Open callback signature; pure type. "os.IsNotExist", // 🟢 checks if error is "not exist"; pure function, no I/O. "os.O_RDONLY", // 🟢 read-only file flag constant; cannot open files by itself. "os.PathError", // 🟢 error type for filesystem path errors; pure type, no I/O. + "path/filepath.Base", // 🟢 returns the last element of a path for os.path.basename(); pure function, no I/O. + "path/filepath.Clean", // 🟢 normalises path before use; pure function, no I/O. "path/filepath.Dir", // 🟢 returns the directory component of a path; pure function, no I/O. + "path/filepath.Ext", // 🟢 returns file extension for os.path.splitext(); pure function, no I/O. "path/filepath.IsAbs", // 🟢 reports whether a path is absolute; pure function, no I/O. + "path/filepath.Join", // 🟢 joins path elements; pure function, no I/O. + "path/filepath.ListSeparator", // 🟢 OS path list separator constant for os.pathsep; pure constant. + "path/filepath.Separator", // 🟢 OS path separator constant for os.sep; pure constant. "path/filepath.ToSlash", // 🟢 converts OS path separators to forward slashes; pure function, no I/O. "regexp.Compile", // 🟢 compiles a regular expression; pure function, no I/O. Uses RE2 engine (linear-time, no backtracking). "regexp.QuoteMeta", // 🟢 escapes all special regex characters in a string; pure function, no I/O. "regexp.Regexp", // 🟢 compiled regular expression type; no I/O side effects. All matching methods are linear-time (RE2). "runtime.GOOS", // 🟢 current OS name constant; pure constant, no I/O. + "runtime.Stack", // 🟢 reads current goroutine stack header; read-only, no exec capability. "slices.Reverse", // 🟢 reverses a slice in-place; pure function, no I/O. "slices.SortFunc", // 🟢 sorts a slice with a comparison function; pure function, no I/O. "slices.SortStableFunc", // 🟢 stable sort with a comparison function; pure function, no I/O. "strconv.Atoi", // 🟢 string-to-int conversion; pure function, no I/O. "strconv.ErrRange", // 🟢 sentinel error value for overflow; pure constant. + "strconv.FormatFloat", // 🟢 float-to-string conversion for Python repr/str; pure function, no I/O. "strconv.FormatInt", // 🟢 int-to-string conversion; pure function, no I/O. "strconv.FormatUint", // 🟢 uint-to-string conversion; pure function, no I/O. "strconv.IntSize", // 🟢 platform int size constant (32 or 64); pure constant, no I/O. @@ -492,15 +649,34 @@ var builtinAllowedSymbols = []string{ "strconv.ParseUint", // 🟢 string-to-unsigned-int conversion; pure function, no I/O. "strings.Builder", // 🟢 efficient string concatenation; pure in-memory buffer, no I/O. "strings.Contains", // 🟢 substring search; pure function, no I/O. + "strings.ContainsAny", // 🟢 checks if string contains any of a set of runes; pure function, no I/O. "strings.ContainsRune", // 🟢 checks if a rune is in a string; pure function, no I/O. + "strings.Count", // 🟢 counts non-overlapping instances of a substring; pure function, no I/O. "strings.Fields", // 🟢 splits a string on whitespace into a slice; pure function, no I/O. "strings.HasPrefix", // 🟢 pure function for prefix matching; no I/O. + "strings.HasSuffix", // 🟢 checks string suffix; pure function, no I/O. + "strings.Index", // 🟢 finds first occurrence of a substring; pure function, no I/O. + "strings.IndexAny", // 🟢 finds first occurrence of any rune in a string; pure function, no I/O. "strings.IndexByte", // 🟢 finds byte in string; pure function, no I/O. "strings.Join", // 🟢 concatenates a slice of strings with a separator; pure function, no I/O. + "strings.LastIndex", // 🟢 finds last occurrence of a substring; pure function, no I/O. + "strings.NewReader", // 🟢 creates an in-memory io.Reader from a string; pure function, no I/O. + "strings.Repeat", // 🟢 repeats a string n times for str*n operator; pure function, no I/O. + "strings.Replace", // 🟢 replaces occurrences of a substring; pure function, no I/O. "strings.ReplaceAll", // 🟢 replaces all occurrences of a substring; pure function, no I/O. "strings.Split", // 🟢 splits a string by separator into a slice; pure function, no I/O. + "strings.SplitN", // 🟢 splits string into at most n substrings; pure function, no I/O. + "strings.Title", // 🟢 title-cases words in a string; pure function, no I/O. "strings.ToLower", // 🟢 converts string to lowercase; pure function, no I/O. + "strings.ToUpper", // 🟢 converts string to uppercase; pure function, no I/O. + "strings.Trim", // 🟢 trims leading and trailing characters; pure function, no I/O. + "strings.TrimLeft", // 🟢 trims leading characters; pure function, no I/O. + "strings.TrimLeftFunc", // 🟢 trims leading runes matching a predicate; pure function, no I/O. + "strings.TrimRight", // 🟢 trims trailing characters; pure function, no I/O. + "strings.TrimRightFunc", // 🟢 trims trailing runes matching a predicate; pure function, no I/O. "strings.TrimSpace", // 🟢 removes leading/trailing whitespace; pure function. + "strings.TrimSuffix", // 🟢 trims a suffix from a string; pure function, no I/O. + "sync.Map", // 🟢 concurrent-safe map for per-goroutine state; no I/O, no side effects. "syscall.ByHandleFileInformation", // 🟢 Windows file info struct for extracting nlink; read-only type, no I/O. "syscall.EACCES", // 🟢 POSIX errno constant for permission denied; pure constant, no I/O. "syscall.EISDIR", // 🟢 error number constant for "is a directory"; pure constant, no I/O. @@ -522,7 +698,10 @@ var builtinAllowedSymbols = []string{ "unicode.Cf", // 🟢 format character category range table; pure data, no I/O. "unicode.Co", // 🟢 private-use character category range table; pure data, no I/O. "unicode.Is", // 🟢 checks if rune belongs to a range table; pure function, no I/O. + "unicode.IsDigit", // 🟢 checks if a rune is a digit; pure function, no I/O. "unicode.IsGraphic", // 🟢 reports whether rune is defined as a graphic character; pure function, no I/O. + "unicode.IsLetter", // 🟢 checks if a rune is a letter; pure function, no I/O. + "unicode.MaxRune", // 🟢 maximum valid Unicode code point constant; pure constant. "unicode.Me", // 🟢 enclosing mark category range table; pure data, no I/O. "unicode.Mn", // 🟢 nonspacing mark category range table; pure data, no I/O. "unicode.Range16", // 🟢 struct type for 16-bit Unicode ranges; pure data. @@ -531,8 +710,11 @@ var builtinAllowedSymbols = []string{ "unicode.Zs", // 🟢 Unicode space separator category range table; pure data, no I/O. "unicode/utf8.DecodeRune", // 🟢 decodes first UTF-8 rune from a byte slice; pure function, no I/O. "unicode/utf8.DecodeRuneInString", // 🟢 decodes first UTF-8 rune from a string; pure function, no I/O. + "unicode/utf8.RuneCountInString", // 🟢 counts runes in a string; pure function, no I/O. "unicode/utf8.RuneError", // 🟢 replacement character returned for invalid UTF-8; constant, no I/O. + "unicode/utf8.RuneLen", // 🟢 returns bytes required to encode a rune; pure function, no I/O. "unicode/utf8.UTFMax", // 🟢 maximum number of bytes in a UTF-8 encoding; constant, no I/O. "unicode/utf8.Valid", // 🟢 checks if a byte slice is valid UTF-8; pure function, no I/O. + "unicode/utf8.ValidString", // 🟢 checks if a string is valid UTF-8; pure function, no I/O. } diff --git a/builtins/python/ast.go b/builtins/python/ast.go new file mode 100644 index 00000000..a7aab93e --- /dev/null +++ b/builtins/python/ast.go @@ -0,0 +1,632 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +// Pos represents a source position. +type Pos struct { + Line int + Col int +} + +// Node is the base interface for all AST nodes. +type Node interface { + nodePos() Pos +} + +// Stmt is an alias for Node representing a statement. +type Stmt = Node + +// Expr is an alias for Node representing an expression. +type Expr = Node + +// ---- Statements ---- + +// Module is the top-level AST node. +type Module struct { + Pos + Body []Stmt +} + +func (n *Module) nodePos() Pos { return n.Pos } + +// AssignStmt handles assignment: a = b = c and tuple unpacking. +type AssignStmt struct { + Pos + Targets []Expr + Value Expr +} + +func (n *AssignStmt) nodePos() Pos { return n.Pos } + +// AugAssignStmt handles augmented assignment: a += b. +type AugAssignStmt struct { + Pos + Target Expr + Op string + Value Expr +} + +func (n *AugAssignStmt) nodePos() Pos { return n.Pos } + +// AnnAssignStmt handles annotated assignment: x: int = 5. +type AnnAssignStmt struct { + Pos + Target Expr + Annotation Expr + Value Expr // may be nil +} + +func (n *AnnAssignStmt) nodePos() Pos { return n.Pos } + +// ExprStmt wraps a bare expression used as a statement. +type ExprStmt struct { + Pos + Value Expr +} + +func (n *ExprStmt) nodePos() Pos { return n.Pos } + +// IfStmt represents an if/elif/else construct. +type IfStmt struct { + Pos + Test Expr + Body []Stmt + Orelse []Stmt +} + +func (n *IfStmt) nodePos() Pos { return n.Pos } + +// WhileStmt represents a while loop. +type WhileStmt struct { + Pos + Test Expr + Body []Stmt + Orelse []Stmt +} + +func (n *WhileStmt) nodePos() Pos { return n.Pos } + +// ForStmt represents a for loop. +type ForStmt struct { + Pos + Target Expr + Iter Expr + Body []Stmt + Orelse []Stmt +} + +func (n *ForStmt) nodePos() Pos { return n.Pos } + +// FuncDef represents a function definition. +// IsGen is true if the body contains a yield expression. +type FuncDef struct { + Pos + Name string + Args *Arguments + Body []Stmt + Decorators []Expr + IsGen bool +} + +func (n *FuncDef) nodePos() Pos { return n.Pos } + +// ClassDef represents a class definition. +type ClassDef struct { + Pos + Name string + Bases []Expr + Body []Stmt + Decorators []Expr +} + +func (n *ClassDef) nodePos() Pos { return n.Pos } + +// ReturnStmt represents a return statement. Value may be nil. +type ReturnStmt struct { + Pos + Value Expr // may be nil +} + +func (n *ReturnStmt) nodePos() Pos { return n.Pos } + +// BreakStmt represents a break statement. +type BreakStmt struct { + Pos +} + +func (n *BreakStmt) nodePos() Pos { return n.Pos } + +// ContinueStmt represents a continue statement. +type ContinueStmt struct { + Pos +} + +func (n *ContinueStmt) nodePos() Pos { return n.Pos } + +// PassStmt represents a pass statement. +type PassStmt struct { + Pos +} + +func (n *PassStmt) nodePos() Pos { return n.Pos } + +// RaiseStmt represents a raise statement. Both Exc and Cause may be nil. +type RaiseStmt struct { + Pos + Exc Expr // may be nil + Cause Expr // may be nil +} + +func (n *RaiseStmt) nodePos() Pos { return n.Pos } + +// TryStmt represents a try/except/else/finally construct. +type TryStmt struct { + Pos + Body []Stmt + Handlers []*ExceptHandler + Orelse []Stmt + Finally []Stmt +} + +func (n *TryStmt) nodePos() Pos { return n.Pos } + +// WithStmt represents a with statement. +type WithStmt struct { + Pos + Items []*WithItem + Body []Stmt +} + +func (n *WithStmt) nodePos() Pos { return n.Pos } + +// ImportStmt represents an import statement: import X, import X as Y. +type ImportStmt struct { + Pos + Names []ImportName +} + +func (n *ImportStmt) nodePos() Pos { return n.Pos } + +// ImportFromStmt represents a from X import a, b statement. +// Names[0].Name == "*" for star import. +type ImportFromStmt struct { + Pos + Module string + Names []ImportName +} + +func (n *ImportFromStmt) nodePos() Pos { return n.Pos } + +// GlobalStmt represents a global declaration. +type GlobalStmt struct { + Pos + Names []string +} + +func (n *GlobalStmt) nodePos() Pos { return n.Pos } + +// NonlocalStmt represents a nonlocal declaration. +type NonlocalStmt struct { + Pos + Names []string +} + +func (n *NonlocalStmt) nodePos() Pos { return n.Pos } + +// DelStmt represents a del statement. +type DelStmt struct { + Pos + Targets []Expr +} + +func (n *DelStmt) nodePos() Pos { return n.Pos } + +// AssertStmt represents an assert statement. Msg may be nil. +type AssertStmt struct { + Pos + Test Expr + Msg Expr // may be nil +} + +func (n *AssertStmt) nodePos() Pos { return n.Pos } + +// ---- Helper types ---- + +// ExceptHandler is a single except clause. Type may be nil for bare except. +type ExceptHandler struct { + Pos + Type Expr // may be nil + Name string // may be "" + Body []Stmt +} + +// WithItem is a single item in a with statement. +type WithItem struct { + CtxExpr Expr + OptVar Expr // may be nil +} + +// ImportName holds a single import name and its optional alias. +type ImportName struct { + Name string + Alias string // may be "" +} + +// Arguments describes the argument specification of a function. +type Arguments struct { + Args []string + Defaults []Expr + Vararg string // "" if no *args + Kwarg string // "" if no **kwargs + KwOnly []string + KwDefaults []Expr +} + +// ---- Expressions ---- + +// BinOp represents a binary operation. +type BinOp struct { + Pos + Left Expr + Right Expr + Op string +} + +func (n *BinOp) nodePos() Pos { return n.Pos } + +// UnaryOp represents a unary operation. Op: "-", "+", "~", "not". +type UnaryOp struct { + Pos + Operand Expr + Op string +} + +func (n *UnaryOp) nodePos() Pos { return n.Pos } + +// BoolOp represents a boolean operation. Op: "and" or "or". +type BoolOp struct { + Pos + Op string + Values []Expr +} + +func (n *BoolOp) nodePos() Pos { return n.Pos } + +// Compare represents a chained comparison: a < b <= c. +type Compare struct { + Pos + Left Expr + Ops []string + Comparators []Expr +} + +func (n *Compare) nodePos() Pos { return n.Pos } + +// CallExpr represents a function call. +type CallExpr struct { + Pos + Func Expr + Args []Expr + Keywords []*Keyword + Starargs []Expr + Kwargs []Expr +} + +func (n *CallExpr) nodePos() Pos { return n.Pos } + +// AttributeExpr represents attribute access: value.attr. +type AttributeExpr struct { + Pos + Value Expr + Attr string +} + +func (n *AttributeExpr) nodePos() Pos { return n.Pos } + +// SubscriptExpr represents subscript access: value[slice]. +type SubscriptExpr struct { + Pos + Value Expr + Slice Expr +} + +func (n *SubscriptExpr) nodePos() Pos { return n.Pos } + +// SliceExpr represents a slice: lower:upper:step. Any field may be nil. +type SliceExpr struct { + Pos + Lower Expr // may be nil + Upper Expr // may be nil + Step Expr // may be nil +} + +func (n *SliceExpr) nodePos() Pos { return n.Pos } + +// NameExpr represents a name reference. +type NameExpr struct { + Pos + Id string +} + +func (n *NameExpr) nodePos() Pos { return n.Pos } + +// Constant represents a literal value. +// Value holds: int64, float64, string, []byte, bool, or nil. +type Constant struct { + Pos + Value interface{} +} + +func (n *Constant) nodePos() Pos { return n.Pos } + +// ListExpr represents a list literal. +type ListExpr struct { + Pos + Elts []Expr +} + +func (n *ListExpr) nodePos() Pos { return n.Pos } + +// TupleExpr represents a tuple literal. +type TupleExpr struct { + Pos + Elts []Expr +} + +func (n *TupleExpr) nodePos() Pos { return n.Pos } + +// DictExpr represents a dict literal. Key==nil means **unpack. +type DictExpr struct { + Pos + Keys []Expr + Values []Expr +} + +func (n *DictExpr) nodePos() Pos { return n.Pos } + +// SetExpr represents a set literal. +type SetExpr struct { + Pos + Elts []Expr +} + +func (n *SetExpr) nodePos() Pos { return n.Pos } + +// IfExp represents a ternary expression: body if test else orelse. +type IfExp struct { + Pos + Test Expr + Body Expr + Orelse Expr +} + +func (n *IfExp) nodePos() Pos { return n.Pos } + +// Lambda represents a lambda expression. +type Lambda struct { + Pos + Args *Arguments + Body Expr +} + +func (n *Lambda) nodePos() Pos { return n.Pos } + +// ListComp represents a list comprehension. +type ListComp struct { + Pos + Elt Expr + Generators []*Comprehension +} + +func (n *ListComp) nodePos() Pos { return n.Pos } + +// DictComp represents a dict comprehension. +type DictComp struct { + Pos + Key Expr + Value Expr + Generators []*Comprehension +} + +func (n *DictComp) nodePos() Pos { return n.Pos } + +// SetComp represents a set comprehension. +type SetComp struct { + Pos + Elt Expr + Generators []*Comprehension +} + +func (n *SetComp) nodePos() Pos { return n.Pos } + +// GeneratorExp represents a generator expression. +type GeneratorExp struct { + Pos + Elt Expr + Generators []*Comprehension +} + +func (n *GeneratorExp) nodePos() Pos { return n.Pos } + +// Yield represents a yield expression. Value may be nil. +type Yield struct { + Pos + Value Expr // may be nil +} + +func (n *Yield) nodePos() Pos { return n.Pos } + +// YieldFrom represents a yield from expression. +type YieldFrom struct { + Pos + Value Expr +} + +func (n *YieldFrom) nodePos() Pos { return n.Pos } + +// Starred represents a starred expression: *x. +type Starred struct { + Pos + Value Expr +} + +func (n *Starred) nodePos() Pos { return n.Pos } + +// Comprehension represents a single for clause in a comprehension. +type Comprehension struct { + Target Expr + Iter Expr + Ifs []Expr +} + +// Keyword represents a keyword argument. Arg=="" means **unpack. +type Keyword struct { + Arg string // "" for **unpack + Value Expr +} + +// containsYield walks stmts recursively looking for Yield or YieldFrom nodes. +// It does NOT recurse into nested FuncDef or Lambda bodies. +func containsYield(stmts []Stmt) bool { + for _, s := range stmts { + if yieldInStmt(s) { + return true + } + } + return false +} + +func yieldInStmt(s Stmt) bool { + switch n := s.(type) { + case *ExprStmt: + return yieldInExpr(n.Value) + case *AssignStmt: + if yieldInExpr(n.Value) { + return true + } + for _, t := range n.Targets { + if yieldInExpr(t) { + return true + } + } + case *AugAssignStmt: + return yieldInExpr(n.Value) + case *AnnAssignStmt: + return yieldInExpr(n.Value) + case *ReturnStmt: + return yieldInExpr(n.Value) + case *IfStmt: + return yieldInExpr(n.Test) || containsYield(n.Body) || containsYield(n.Orelse) + case *WhileStmt: + return yieldInExpr(n.Test) || containsYield(n.Body) || containsYield(n.Orelse) + case *ForStmt: + return yieldInExpr(n.Iter) || containsYield(n.Body) || containsYield(n.Orelse) + case *TryStmt: + if containsYield(n.Body) || containsYield(n.Orelse) || containsYield(n.Finally) { + return true + } + for _, h := range n.Handlers { + if containsYield(h.Body) { + return true + } + } + case *WithStmt: + return containsYield(n.Body) + case *RaiseStmt: + return yieldInExpr(n.Exc) || yieldInExpr(n.Cause) + case *DelStmt: + for _, t := range n.Targets { + if yieldInExpr(t) { + return true + } + } + case *AssertStmt: + return yieldInExpr(n.Test) || yieldInExpr(n.Msg) + // FuncDef and ClassDef: do NOT recurse into nested function bodies + } + return false +} + +func yieldInExpr(e Expr) bool { + if e == nil { + return false + } + switch n := e.(type) { + case *Yield: + return true + case *YieldFrom: + return true + case *BinOp: + return yieldInExpr(n.Left) || yieldInExpr(n.Right) + case *UnaryOp: + return yieldInExpr(n.Operand) + case *BoolOp: + for _, v := range n.Values { + if yieldInExpr(v) { + return true + } + } + case *Compare: + if yieldInExpr(n.Left) { + return true + } + for _, c := range n.Comparators { + if yieldInExpr(c) { + return true + } + } + case *CallExpr: + if yieldInExpr(n.Func) { + return true + } + for _, a := range n.Args { + if yieldInExpr(a) { + return true + } + } + for _, kw := range n.Keywords { + if yieldInExpr(kw.Value) { + return true + } + } + case *AttributeExpr: + return yieldInExpr(n.Value) + case *SubscriptExpr: + return yieldInExpr(n.Value) || yieldInExpr(n.Slice) + case *SliceExpr: + return yieldInExpr(n.Lower) || yieldInExpr(n.Upper) || yieldInExpr(n.Step) + case *ListExpr: + for _, elt := range n.Elts { + if yieldInExpr(elt) { + return true + } + } + case *TupleExpr: + for _, elt := range n.Elts { + if yieldInExpr(elt) { + return true + } + } + case *DictExpr: + for i, k := range n.Keys { + if yieldInExpr(k) || yieldInExpr(n.Values[i]) { + return true + } + } + case *SetExpr: + for _, elt := range n.Elts { + if yieldInExpr(elt) { + return true + } + } + case *IfExp: + return yieldInExpr(n.Test) || yieldInExpr(n.Body) || yieldInExpr(n.Orelse) + case *Starred: + return yieldInExpr(n.Value) + // Lambda: do NOT recurse into lambda body + } + return false +} diff --git a/builtins/python/builtins_funcs.go b/builtins/python/builtins_funcs.go new file mode 100644 index 00000000..79253b20 --- /dev/null +++ b/builtins/python/builtins_funcs.go @@ -0,0 +1,1854 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "fmt" + "io" + "math" + "math/big" + "os" + "strconv" + "strings" + "unicode/utf8" +) + +// makeBuiltins returns the dict of all Python built-in functions and constants. +func makeBuiltins(opts *RunOpts) map[string]Object { + b := map[string]Object{ + // Constants + "True": pyTrue, + "False": pyFalse, + "None": pyNone, + + // Exception classes + "BaseException": ExcBaseException, + "Exception": ExcException, + "ArithmeticError": ExcArithmeticError, + "LookupError": ExcLookupError, + "ValueError": ExcValueError, + "TypeError": ExcTypeError, + "AttributeError": ExcAttributeError, + "NameError": ExcNameError, + "ImportError": ExcImportError, + "IndexError": ExcIndexError, + "KeyError": ExcKeyError, + "StopIteration": ExcStopIteration, + "GeneratorExit": ExcGeneratorExit, + "RuntimeError": ExcRuntimeError, + "NotImplementedError": ExcNotImplementedError, + "OSError": ExcOSError, + "IOError": ExcIOError, + "FileNotFoundError": ExcFileNotFoundError, + "PermissionError": ExcPermissionError, + "ZeroDivisionError": ExcZeroDivisionError, + "OverflowError": ExcOverflowError, + "MemoryError": ExcMemoryError, + "KeyboardInterrupt": ExcKeyboardInterrupt, + "SystemExit": ExcSystemExit, + "AssertionError": ExcAssertionError, + "UnboundLocalError": ExcUnboundLocalError, + "RecursionError": ExcRecursionError, + "UnicodeError": ExcUnicodeError, + "UnicodeDecodeError": ExcUnicodeDecodeError, + "UnicodeEncodeError": ExcUnicodeEncodeError, + + // Special singletons + "NotImplemented": &PyNotImplemented{}, + "Ellipsis": &PyEllipsis{}, + + // Built-in functions + "print": makeBuiltinPrint(opts), + "len": makeBuiltinLen(), + "range": makeBuiltinRange(), + "zip": makeBuiltinZip(), + "map": makeBuiltinMap(), + "filter": makeBuiltinFilter(), + "enumerate": makeBuiltinEnumerate(), + "sorted": makeBuiltinSorted(), + "reversed": makeBuiltinReversed(), + "all": makeBuiltinAll(), + "any": makeBuiltinAny(), + "sum": makeBuiltinSum(), + "min": makeBuiltinMin(), + "max": makeBuiltinMax(), + "abs": makeBuiltinAbs(), + "divmod": makeBuiltinDivmod(), + "pow": makeBuiltinPow(), + "round": makeBuiltinRound(), + "chr": makeBuiltinChr(), + "ord": makeBuiltinOrd(), + "bin": makeBuiltinBin(), + "hex": makeBuiltinHex(), + "oct": makeBuiltinOct(), + "getattr": makeBuiltinGetattr(), + "setattr": makeBuiltinSetattr(), + "hasattr": makeBuiltinHasattr(), + "delattr": makeBuiltinDelattr(), + "isinstance": makeBuiltinIsinstance(), + "issubclass": makeBuiltinIssubclass(), + "type": makeBuiltinType(), + "int": makeBuiltinInt(), + "str": makeBuiltinStr(), + "float": makeBuiltinFloat(), + "bool": makeBuiltinBool(), + "list": makeBuiltinList(), + "dict": makeBuiltinDict(), + "tuple": makeBuiltinTuple(), + "set": makeBuiltinSet(), + "frozenset": makeBuiltinFrozenset(), + "repr": makeBuiltinRepr(), + "hash": makeBuiltinHash(), + "id": makeBuiltinId(), + "callable": makeBuiltinCallable(), + "next": makeBuiltinNext(), + "iter": makeBuiltinIter(), + "input": makeBuiltinInput(opts), + "vars": makeBuiltinVars(), + "dir": makeBuiltinDir(), + "format": makeBuiltinFormat(), + "bytes": makeBuiltinBytes(), + "bytearray": makeBuiltinBytearray(), + "memoryview": makeBuiltinMemoryview(), + "open": makeBuiltinOpen(opts), + "super": makeBuiltinSuper(), + "object": makeBuiltinObject(), + "staticmethod": makeBuiltin("staticmethod", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("staticmethod() takes exactly 1 argument") + } + return args[0] + }), + "classmethod": makeBuiltin("classmethod", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("classmethod() takes exactly 1 argument") + } + return args[0] + }), + "property": makeBuiltin("property", func(args []Object, kwargs map[string]Object) Object { + // Simplified: just return the getter + if len(args) > 0 { + return args[0] + } + return pyNone + }), + } + return b +} + +// ---- Singleton types ---- + +type PyNotImplemented struct{} + +func (p *PyNotImplemented) pyType() *PyType { return typeBuiltin } +func (p *PyNotImplemented) pyRepr() string { return "NotImplemented" } +func (p *PyNotImplemented) pyStr() string { return "NotImplemented" } + +type PyEllipsis struct{} + +func (p *PyEllipsis) pyType() *PyType { return typeBuiltin } +func (p *PyEllipsis) pyRepr() string { return "Ellipsis" } +func (p *PyEllipsis) pyStr() string { return "..." } + +// ---- Built-in function implementations ---- + +func makeBuiltinPrint(opts *RunOpts) *PyBuiltin { + return makeBuiltin("print", func(args []Object, kwargs map[string]Object) Object { + sep := " " + end := "\n" + var out io.Writer = opts.Stdout + + if v, ok := kwargs["sep"]; ok { + if v == pyNone { + sep = " " + } else if s, ok2 := v.(*PyStr); ok2 { + sep = s.v + } + } + if v, ok := kwargs["end"]; ok { + if v == pyNone { + end = "\n" + } else if s, ok2 := v.(*PyStr); ok2 { + end = s.v + } + } + if v, ok := kwargs["file"]; ok && v != pyNone { + if f, ok2 := v.(*PyFile); ok2 { + if f.w != nil { + out = f.w + } else if f.rc != nil { + // Files opened via open() are read-only; block writes at the + // application layer for consistency with file.write(). + panic(exceptionSignal{exc: newExceptionf(ExcPermissionError, "print() cannot write to a file opened in read mode")}) + } + } + } + + parts := make([]string, len(args)) + for i, arg := range args { + parts[i] = arg.pyStr() + } + fmt.Fprint(out, strings.Join(parts, sep)+end) + return pyNone + }) +} + +func makeBuiltinLen() *PyBuiltin { + return makeBuiltin("len", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("len() takes exactly 1 argument (%d given)", len(args)) + } + obj := args[0] + switch v := obj.(type) { + case *PyStr: + return pyInt(int64(utf8.RuneCountInString(v.v))) + case *PyBytes: + return pyInt(int64(len(v.v))) + case *PyList: + return pyInt(int64(len(v.items))) + case *PyTuple: + return pyInt(int64(len(v.items))) + case *PyDict: + return pyInt(int64(len(v.keys))) + case *PySet: + return pyInt(int64(len(v.items))) + case *PyFrozenSet: + return pyInt(int64(len(v.items))) + case *PyRange: + return pyInt(v.length()) + case *PyInstance: + if fn, ok := v.lookupMethod("__len__"); ok { + result := callObject(fn, []Object{v}, nil) + return result + } + } + raiseTypeError("object of type '%s' has no len()", obj.pyType().Name) + return nil + }) +} + +func makeBuiltinRange() *PyBuiltin { + return makeBuiltin("range", func(args []Object, kwargs map[string]Object) Object { + switch len(args) { + case 1: + stop := toIntVal(args[0]) + return &PyRange{start: 0, stop: stop, step: 1} + case 2: + start := toIntVal(args[0]) + stop := toIntVal(args[1]) + return &PyRange{start: start, stop: stop, step: 1} + case 3: + start := toIntVal(args[0]) + stop := toIntVal(args[1]) + step := toIntVal(args[2]) + if step == 0 { + raiseValueError("range() arg 3 must not be zero") + } + return &PyRange{start: start, stop: stop, step: step} + default: + raiseTypeError("range() takes 1 to 3 arguments (%d given)", len(args)) + } + return nil + }) +} + +func makeBuiltinZip() *PyBuiltin { + return makeBuiltin("zip", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + return &PyZipIter{items: [][]Object{}} + } + collected := make([][]Object, len(args)) + for i, arg := range args { + collected[i] = collectIterable(arg) + } + return &PyZipIter{items: collected} + }) +} + +func makeBuiltinMap() *PyBuiltin { + return makeBuiltin("map", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 2 { + raiseTypeError("map() requires at least 2 arguments") + } + fn := args[0] + collected := make([][]Object, len(args)-1) + for i, arg := range args[1:] { + collected[i] = collectIterable(arg) + } + return &PyMapIter{fn: fn, items: collected} + }) +} + +func makeBuiltinFilter() *PyBuiltin { + return makeBuiltin("filter", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("filter() takes exactly 2 arguments (%d given)", len(args)) + } + fn := args[0] + items := collectIterable(args[1]) + return &PyFilterIter{fn: fn, items: items} + }) +} + +func makeBuiltinEnumerate() *PyBuiltin { + return makeBuiltin("enumerate", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("enumerate() requires at least 1 argument") + } + start := int64(0) + if len(args) > 1 { + start = toIntVal(args[1]) + } + if v, ok := kwargs["start"]; ok { + start = toIntVal(v) + } + items := collectIterable(args[0]) + return &PyEnumerateIter{items: items, counter: start} + }) +} + +func makeBuiltinSorted() *PyBuiltin { + return makeBuiltin("sorted", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("sorted() requires at least 1 argument") + } + items := collectIterable(args[0]) + result := make([]Object, len(items)) + copy(result, items) + reverse := false + var keyFn Object + if v, ok := kwargs["reverse"]; ok { + reverse = pyTruth(v) + } + if v, ok := kwargs["key"]; ok && v != pyNone { + keyFn = v + } + sortList(result, keyFn, reverse) + return pyList(result) + }) +} + +func makeBuiltinReversed() *PyBuiltin { + return makeBuiltin("reversed", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("reversed() takes exactly 1 argument") + } + items := collectIterable(args[0]) + return &PyReversedIter{items: items, idx: len(items) - 1} + }) +} + +func makeBuiltinAll() *PyBuiltin { + return makeBuiltin("all", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("all() takes exactly 1 argument") + } + items := collectIterable(args[0]) + for _, item := range items { + if !pyTruth(item) { + return pyFalse + } + } + return pyTrue + }) +} + +func makeBuiltinAny() *PyBuiltin { + return makeBuiltin("any", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("any() takes exactly 1 argument") + } + items := collectIterable(args[0]) + for _, item := range items { + if pyTruth(item) { + return pyTrue + } + } + return pyFalse + }) +} + +func makeBuiltinSum() *PyBuiltin { + return makeBuiltin("sum", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("sum() requires at least 1 argument") + } + start := Object(pyInt(0)) + if len(args) > 1 { + start = args[1] + } + items := collectIterable(args[0]) + result := start + for _, item := range items { + result = pyAdd(result, item) + } + return result + }) +} + +func makeBuiltinMin() *PyBuiltin { + return makeBuiltin("min", func(args []Object, kwargs map[string]Object) Object { + var keyFn Object + if v, ok := kwargs["key"]; ok && v != pyNone { + keyFn = v + } + var items []Object + if len(args) == 1 { + items = collectIterable(args[0]) + } else { + items = args + } + if len(items) == 0 { + raiseValueError("min() arg is an empty sequence") + } + best := items[0] + bestKey := applyKey(best, keyFn) + for _, item := range items[1:] { + k := applyKey(item, keyFn) + if pyCompare(k, bestKey) < 0 { + best = item + bestKey = k + } + } + return best + }) +} + +func makeBuiltinMax() *PyBuiltin { + return makeBuiltin("max", func(args []Object, kwargs map[string]Object) Object { + var keyFn Object + if v, ok := kwargs["key"]; ok && v != pyNone { + keyFn = v + } + var items []Object + if len(args) == 1 { + items = collectIterable(args[0]) + } else { + items = args + } + if len(items) == 0 { + raiseValueError("max() arg is an empty sequence") + } + best := items[0] + bestKey := applyKey(best, keyFn) + for _, item := range items[1:] { + k := applyKey(item, keyFn) + if pyCompare(k, bestKey) > 0 { + best = item + bestKey = k + } + } + return best + }) +} + +func applyKey(item Object, keyFn Object) Object { + if keyFn == nil || keyFn == pyNone { + return item + } + return callObject(keyFn, []Object{item}, nil) +} + +func makeBuiltinAbs() *PyBuiltin { + return makeBuiltin("abs", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("abs() takes exactly 1 argument") + } + switch v := args[0].(type) { + case *PyInt: + if v.big != nil { + b := new(big.Int).Abs(v.big) + return pyIntBig(b) + } + if v.small < 0 { + return pyInt(-v.small) + } + return v + case *PyFloat: + return pyFloat(math.Abs(v.v)) + case *PyBool: + if !v.v { + return pyInt(0) + } + return pyInt(1) + } + raiseTypeError("bad operand type for abs(): '%s'", args[0].pyType().Name) + return nil + }) +} + +func makeBuiltinDivmod() *PyBuiltin { + return makeBuiltin("divmod", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("divmod() takes exactly 2 arguments") + } + a, b := args[0], args[1] + switch av := a.(type) { + case *PyInt: + if bv, ok := b.(*PyInt); ok { + // Use big.Int arithmetic to handle values outside int64 range. + ab := av.toBigInt() + bb := bv.toBigInt() + if bb.Sign() == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "integer division or modulo by zero")}) + } + q := new(big.Int) + r := new(big.Int) + q.DivMod(ab, bb, r) + // Python-style modulo: result has same sign as divisor + if r.Sign() != 0 && r.Sign() != bb.Sign() { + r.Add(r, bb) + q.Sub(q, big.NewInt(1)) + } + return pyTuple([]Object{pyIntBig(q), pyIntBig(r)}) + } + case *PyFloat: + var bv float64 + switch bval := b.(type) { + case *PyFloat: + bv = bval.v + case *PyInt: + if n, ok := bval.int64(); ok { + bv = float64(n) + } + } + if bv == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "float divmod()")}) + } + q := math.Floor(av.v / bv) + r := av.v - q*bv + return pyTuple([]Object{pyFloat(q), pyFloat(r)}) + } + raiseTypeError("unsupported operand type(s) for divmod()") + return nil + }) +} + +func makeBuiltinPow() *PyBuiltin { + return makeBuiltin("pow", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 2 || len(args) > 3 { + raiseTypeError("pow() takes 2 or 3 arguments (%d given)", len(args)) + } + base, exp := args[0], args[1] + if len(args) == 3 { + // Modular exponentiation + mod := args[2] + bi := toIntValObj(base) + ei := toIntValObj(exp) + mi := toIntValObj(mod) + result := new(big.Int).Exp(bi, ei, mi) + return pyIntBig(result) + } + // Regular pow + switch bv := base.(type) { + case *PyInt: + switch ev := exp.(type) { + case *PyInt: + en, eok := ev.int64() + if eok && en >= 0 { + // Guard against exponents that would produce astronomically large results. + // Analogous to the maxShift cap on <<. Allow up to ~8 Mbit of result. + const maxExpBits = 8 * maxRepeatBytes + baseBits := int64(bv.toBigInt().BitLen()) + if baseBits > 1 && en > maxExpBits/baseBits { + panic(exceptionSignal{exc: newExceptionf(ExcOverflowError, "integer exponentiation result too large")}) + } + bi := bv.toBigInt() + ei := ev.toBigInt() + result := new(big.Int).Exp(bi, ei, nil) + return pyIntBig(result) + } + // Negative exponent → float + bn, _ := bv.int64() + en2, _ := ev.int64() + return pyFloat(math.Pow(float64(bn), float64(en2))) + case *PyFloat: + bn, _ := bv.int64() + return pyFloat(math.Pow(float64(bn), ev.v)) + } + case *PyFloat: + var ef float64 + switch ev := exp.(type) { + case *PyFloat: + ef = ev.v + case *PyInt: + if n, ok := ev.int64(); ok { + ef = float64(n) + } + } + return pyFloat(math.Pow(bv.v, ef)) + } + raiseTypeError("unsupported operand type(s) for pow()") + return nil + }) +} + +func toIntValObj(obj Object) *big.Int { + switch v := obj.(type) { + case *PyInt: + return v.toBigInt() + case *PyBool: + if v.v { + return big.NewInt(1) + } + return big.NewInt(0) + } + raiseTypeError("expected int, got %s", obj.pyType().Name) + return nil +} + +func makeBuiltinRound() *PyBuiltin { + return makeBuiltin("round", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 || len(args) > 2 { + raiseTypeError("round() takes 1 or 2 arguments (%d given)", len(args)) + } + ndigits := -1 + if len(args) == 2 && args[1] != pyNone { + ndigits = int(toIntVal(args[1])) + } + switch v := args[0].(type) { + case *PyFloat: + if ndigits < 0 { + // Round to int + return pyInt(int64(math.RoundToEven(v.v))) + } + factor := math.Pow10(ndigits) + return pyFloat(math.RoundToEven(v.v*factor) / factor) + case *PyInt: + if ndigits < 0 { + return v + } + return v + } + raiseTypeError("type %s doesn't define __round__ method", args[0].pyType().Name) + return nil + }) +} + +func makeBuiltinChr() *PyBuiltin { + return makeBuiltin("chr", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("chr() takes exactly 1 argument") + } + n := toIntVal(args[0]) + if n < 0 || n > 0x10FFFF { + raiseValueError("chr() arg not in range(0x110000)") + } + return pyStr(string(rune(n))) + }) +} + +func makeBuiltinOrd() *PyBuiltin { + return makeBuiltin("ord", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("ord() takes exactly 1 argument") + } + switch v := args[0].(type) { + case *PyStr: + runes := []rune(v.v) + if len(runes) != 1 { + raiseTypeError("ord() expected a character, but string of length %d found", len(runes)) + } + return pyInt(int64(runes[0])) + case *PyBytes: + if len(v.v) != 1 { + raiseTypeError("ord() expected a character, but bytes of length %d found", len(v.v)) + } + return pyInt(int64(v.v[0])) + } + raiseTypeError("ord() expected string of length 1, but %s found", args[0].pyType().Name) + return nil + }) +} + +func makeBuiltinBin() *PyBuiltin { + return makeBuiltin("bin", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("bin() takes exactly 1 argument") + } + bi := toIntValBig(args[0]) + if bi.Sign() >= 0 { + return pyStr("0b" + bi.Text(2)) + } + return pyStr("-0b" + new(big.Int).Neg(bi).Text(2)) + }) +} + +func makeBuiltinHex() *PyBuiltin { + return makeBuiltin("hex", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("hex() takes exactly 1 argument") + } + bi := toIntValBig(args[0]) + if bi.Sign() >= 0 { + return pyStr("0x" + bi.Text(16)) + } + return pyStr("-0x" + new(big.Int).Neg(bi).Text(16)) + }) +} + +func makeBuiltinOct() *PyBuiltin { + return makeBuiltin("oct", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("oct() takes exactly 1 argument") + } + bi := toIntValBig(args[0]) + if bi.Sign() >= 0 { + return pyStr("0o" + bi.Text(8)) + } + return pyStr("-0o" + new(big.Int).Neg(bi).Text(8)) + }) +} + +func makeBuiltinGetattr() *PyBuiltin { + return makeBuiltin("getattr", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 2 || len(args) > 3 { + raiseTypeError("getattr() takes 2 or 3 arguments") + } + obj := args[0] + name := mustStr(args[1], "getattr") + val, ok := getAttr(obj, name) + if !ok { + if len(args) == 3 { + return args[2] + } + raiseAttributeError(obj.pyType().Name, name) + } + return val + }) +} + +func makeBuiltinSetattr() *PyBuiltin { + return makeBuiltin("setattr", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 3 { + raiseTypeError("setattr() takes exactly 3 arguments") + } + obj := args[0] + name := mustStr(args[1], "setattr") + val := args[2] + setAttr(obj, name, val) + return pyNone + }) +} + +func makeBuiltinHasattr() *PyBuiltin { + return makeBuiltin("hasattr", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("hasattr() takes exactly 2 arguments") + } + obj := args[0] + name := mustStr(args[1], "hasattr") + // Try to get the attr; if it panics, return False + result := func() (found bool) { + defer func() { + if r := recover(); r != nil { + found = false + } + }() + _, found = getAttr(obj, name) + return found + }() + return pyBool(result) + }) +} + +func makeBuiltinDelattr() *PyBuiltin { + return makeBuiltin("delattr", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("delattr() takes exactly 2 arguments") + } + obj := args[0] + name := mustStr(args[1], "delattr") + if inst, ok := obj.(*PyInstance); ok { + delete(inst.Dict, name) + } else { + raiseAttributeError(obj.pyType().Name, name) + } + return pyNone + }) +} + +func makeBuiltinIsinstance() *PyBuiltin { + return makeBuiltin("isinstance", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("isinstance() takes exactly 2 arguments") + } + obj := args[0] + classinfo := args[1] + return pyBool(checkInstance(obj, classinfo)) + }) +} + +func checkInstance(obj Object, classinfo Object) bool { + switch cv := classinfo.(type) { + case *PyClass: + return isInstance(obj, cv) + case *PyTuple: + for _, c := range cv.items { + if checkInstance(obj, c) { + return true + } + } + return false + case *PyType: + return obj.pyType() == cv || + (cv == typeInt && (isIntLike(obj))) || + (cv == typeStr && isStrLike(obj)) || + (cv == typeBool && isBoolLike(obj)) + case *PyBuiltin: + // Handle isinstance(x, int/str/float/bool/list/dict/tuple/set/bytes) + // where the type constructors are PyBuiltin objects. + switch cv.Name { + case "int": + return isIntLike(obj) + case "str": + return isStrLike(obj) + case "float": + _, ok := obj.(*PyFloat) + return ok + case "bool": + return isBoolLike(obj) + case "list": + _, ok := obj.(*PyList) + return ok + case "dict": + _, ok := obj.(*PyDict) + return ok + case "tuple": + _, ok := obj.(*PyTuple) + return ok + case "set": + _, ok := obj.(*PySet) + return ok + case "bytes": + _, ok := obj.(*PyBytes) + return ok + } + return false + } + return false +} + +func isIntLike(obj Object) bool { + switch obj.(type) { + case *PyInt, *PyBool: + return true + } + return false +} + +func isStrLike(obj Object) bool { + _, ok := obj.(*PyStr) + return ok +} + +func isBoolLike(obj Object) bool { + _, ok := obj.(*PyBool) + return ok +} + +func makeBuiltinIssubclass() *PyBuiltin { + return makeBuiltin("issubclass", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("issubclass() takes exactly 2 arguments") + } + cls, ok := args[0].(*PyClass) + if !ok { + raiseTypeError("issubclass() arg 1 must be a class") + } + classinfo := args[1] + switch cv := classinfo.(type) { + case *PyClass: + for _, c := range cls.MRO { + if c == cv { + return pyTrue + } + } + return pyFalse + case *PyTuple: + for _, c := range cv.items { + if cls2, ok2 := c.(*PyClass); ok2 { + for _, mro := range cls.MRO { + if mro == cls2 { + return pyTrue + } + } + } + } + return pyFalse + } + raiseTypeError("issubclass() arg 2 must be a class or tuple of classes") + return nil + }) +} + +func makeBuiltinType() *PyBuiltin { + return makeBuiltin("type", func(args []Object, kwargs map[string]Object) Object { + switch len(args) { + case 1: + obj := args[0] + switch v := obj.(type) { + case *PyInstance: + return v.Class + case *PyException: + return v.ExcClass + default: + return obj.pyType() + } + case 3: + name := mustStr(args[0], "type") + var bases []*PyClass + if bt, ok := args[1].(*PyTuple); ok { + for _, b := range bt.items { + if bc, ok2 := b.(*PyClass); ok2 { + bases = append(bases, bc) + } + } + } + dictArg, ok := args[2].(*PyDict) + if !ok { + raiseTypeError("type() arg 3 must be a dict") + } + cls := &PyClass{ + Name: name, + Bases: bases, + Dict: make(map[string]Object), + } + for i, k := range dictArg.keys { + if ks, ok2 := k.(*PyStr); ok2 { + cls.Dict[ks.v] = dictArg.vals[i] + } + } + cls.MRO = computeMRO(cls) + return cls + default: + raiseTypeError("type() takes 1 or 3 arguments (%d given)", len(args)) + } + return nil + }) +} + +func makeBuiltinInt() *PyBuiltin { + return makeBuiltin("int", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + return pyInt(0) + } + obj := args[0] + base := 10 + if len(args) > 1 { + base = int(toIntVal(args[1])) + } + if v, ok := kwargs["base"]; ok { + base = int(toIntVal(v)) + } + switch v := obj.(type) { + case *PyInt: + return v + case *PyBool: + if v.v { + return pyInt(1) + } + return pyInt(0) + case *PyFloat: + return pyInt(int64(v.v)) + case *PyStr: + s := strings.TrimSpace(v.v) + // Handle prefix for auto-base detection + if base == 0 { + if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") { + base = 16 + s = s[2:] + } else if strings.HasPrefix(s, "0o") || strings.HasPrefix(s, "0O") { + base = 8 + s = s[2:] + } else if strings.HasPrefix(s, "0b") || strings.HasPrefix(s, "0B") { + base = 2 + s = s[2:] + } else { + base = 10 + } + } else if base == 16 && (strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X")) { + s = s[2:] + } else if base == 8 && (strings.HasPrefix(s, "0o") || strings.HasPrefix(s, "0O")) { + s = s[2:] + } else if base == 2 && (strings.HasPrefix(s, "0b") || strings.HasPrefix(s, "0B")) { + s = s[2:] + } + n, err := strconv.ParseInt(s, base, 64) + if err != nil { + // Try big int + bi := new(big.Int) + _, ok2 := bi.SetString(s, base) + if !ok2 { + raiseValueError("invalid literal for int() with base %d: %s", base, v.pyRepr()) + } + return pyIntBig(bi) + } + return pyInt(n) + } + raiseTypeError("int() argument must be a string, a bytes-like object or a number, not '%s'", obj.pyType().Name) + return nil + }) +} + +func makeBuiltinStr() *PyBuiltin { + return makeBuiltin("str", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + return pyStr("") + } + return pyStr(args[0].pyStr()) + }) +} + +func makeBuiltinFloat() *PyBuiltin { + return makeBuiltin("float", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + return pyFloat(0) + } + obj := args[0] + switch v := obj.(type) { + case *PyFloat: + return v + case *PyInt: + if n, ok := v.int64(); ok { + return pyFloat(float64(n)) + } + f, _ := new(big.Float).SetInt(v.big).Float64() + return pyFloat(f) + case *PyBool: + if v.v { + return pyFloat(1) + } + return pyFloat(0) + case *PyStr: + s := strings.TrimSpace(v.v) + switch strings.ToLower(s) { + case "inf", "+inf", "infinity", "+infinity": + return pyFloat(math.Inf(1)) + case "-inf", "-infinity": + return pyFloat(math.Inf(-1)) + case "nan": + return pyFloat(math.NaN()) + } + f, err := strconv.ParseFloat(s, 64) + if err != nil { + raiseValueError("could not convert string to float: %s", v.pyRepr()) + } + return pyFloat(f) + } + raiseTypeError("float() argument must be a string or a number, not '%s'", obj.pyType().Name) + return nil + }) +} + +func makeBuiltinBool() *PyBuiltin { + return makeBuiltin("bool", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + return pyFalse + } + return pyBool(pyTruth(args[0])) + }) +} + +func makeBuiltinList() *PyBuiltin { + return makeBuiltin("list", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + return pyList(nil) + } + items := collectIterable(args[0]) + return pyList(items) + }) +} + +func makeBuiltinDict() *PyBuiltin { + return makeBuiltin("dict", func(args []Object, kwargs map[string]Object) Object { + d := pyDict() + if len(args) > 0 { + switch v := args[0].(type) { + case *PyDict: + for i, k := range v.keys { + d.set(k, v.vals[i]) + } + default: + // Assume iterable of (key, value) pairs + items := collectIterable(args[0]) + for _, item := range items { + pair, ok := item.(*PyTuple) + if !ok || len(pair.items) != 2 { + raiseValueError("dictionary update sequence element is not a 2-tuple") + } + d.set(pair.items[0], pair.items[1]) + } + } + } + for k, v := range kwargs { + d.set(pyStr(k), v) + } + return d + }) +} + +func makeBuiltinTuple() *PyBuiltin { + return makeBuiltin("tuple", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + return pyTuple(nil) + } + items := collectIterable(args[0]) + return pyTuple(items) + }) +} + +func makeBuiltinSet() *PyBuiltin { + return makeBuiltin("set", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + s := &PySet{items: make(map[any]Object)} + return s + } + items := collectIterable(args[0]) + s, err := pySet(items) + if err != nil { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "%v", err)}) + } + return s + }) +} + +func makeBuiltinFrozenset() *PyBuiltin { + return makeBuiltin("frozenset", func(args []Object, kwargs map[string]Object) Object { + s := &PyFrozenSet{items: make(map[any]Object)} + if len(args) == 0 { + return s + } + items := collectIterable(args[0]) + for _, item := range items { + k, err := hashKey(item) + if err != nil { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "unhashable type: '%s'", item.pyType().Name)}) + } + s.items[k] = item + } + return s + }) +} + +func makeBuiltinRepr() *PyBuiltin { + return makeBuiltin("repr", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("repr() takes exactly 1 argument") + } + return pyStr(args[0].pyRepr()) + }) +} + +func makeBuiltinHash() *PyBuiltin { + return makeBuiltin("hash", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("hash() takes exactly 1 argument") + } + k, err := hashKey(args[0]) + if err != nil { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "unhashable type: '%s'", args[0].pyType().Name)}) + } + // Convert to a stable int + switch v := k.(type) { + case int64: + return pyInt(v) + case float64: + return pyInt(int64(v)) + case string: + // simple hash + h := int64(0) + for _, c := range []byte(v) { + h = h*31 + int64(c) + } + return pyInt(h) + case nil: + return pyInt(0) + } + return pyInt(0) + }) +} + +func makeBuiltinId() *PyBuiltin { + return makeBuiltin("id", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("id() takes exactly 1 argument") + } + // Return a stable per-object identifier. Using the pointer stored + // in the interface value (args[0]) rather than &args[0] (the slice + // slot address) ensures the same object always returns the same id. + id := fmt.Sprintf("%p", args[0]) + // Parse hex pointer address + if len(id) > 2 { + n, err := strconv.ParseInt(id[2:], 16, 64) + if err == nil { + return pyInt(n) + } + } + return pyInt(0) + }) +} + +func makeBuiltinCallable() *PyBuiltin { + return makeBuiltin("callable", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("callable() takes exactly 1 argument") + } + return pyBool(isCallable(args[0])) + }) +} + +func isCallable(obj Object) bool { + switch obj.(type) { + case *PyFunction, *PyBuiltin, *PyBoundMethod, *PyClass: + return true + case *PyInstance: + inst := obj.(*PyInstance) + _, ok := inst.lookupMethod("__call__") + return ok + } + return false +} + +func makeBuiltinNext() *PyBuiltin { + return makeBuiltin("next", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 || len(args) > 2 { + raiseTypeError("next() takes 1 or 2 arguments (%d given)", len(args)) + } + val, ok := nextFromIterable(args[0]) + if !ok { + if len(args) == 2 { + return args[1] + } + panic(exceptionSignal{exc: newException(ExcStopIteration)}) + } + return val + }) +} + +func makeBuiltinIter() *PyBuiltin { + return makeBuiltin("iter", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("iter() requires at least 1 argument") + } + obj := args[0] + // Return an appropriate iterator + switch v := obj.(type) { + case *PyList: + return &PyListIter{items: v.items} + case *PyTuple: + return &PyListIter{items: v.items} + case *PyStr: + runes := []rune(v.v) + items := make([]Object, len(runes)) + for i, r := range runes { + items[i] = pyStr(string(r)) + } + return &PyListIter{items: items} + case *PyRange: + return &rangeIter{r: v, cur: v.start} + case *PyDict: + keys := make([]Object, len(v.keys)) + copy(keys, v.keys) + return &PyDictKeyIter{keys: keys} + case *PySet: + items := make([]Object, 0, len(v.items)) + for _, item := range v.items { + items = append(items, item) + } + return &PyListIter{items: items} + case *rangeIter, *PyMapIter, *PyFilterIter, *PyZipIter, *PyEnumerateIter, *PyReversedIter, *PyListIter, *PyDictKeyIter, *PyGenerator: + return obj + case *PyInstance: + if fn, ok2 := v.lookupMethod("__iter__"); ok2 { + return callObject(fn, []Object{v}, nil) + } + } + raiseTypeError("'%s' object is not iterable", obj.pyType().Name) + return nil + }) +} + +func makeBuiltinInput(opts *RunOpts) *PyBuiltin { + return makeBuiltin("input", func(args []Object, kwargs map[string]Object) Object { + if len(args) > 0 { + fmt.Fprint(opts.Stdout, args[0].pyStr()) + } + if opts.Stdin == nil || opts.stdinReader == nil { + return pyStr("") + } + // opts.stdinReader is a single persistent bufio.Reader (initialised by + // runInternal) so read-ahead bytes are not dropped between input() calls. + // The underlying reader is already wrapped in a global LimitReader. + line, err := opts.stdinReader.ReadString('\n') + if err != nil && err != io.EOF { + panic(exceptionSignal{exc: newExceptionf(ExcOSError, "input error: %v", err)}) + } + line = strings.TrimRight(line, "\n") + line = strings.TrimRight(line, "\r") + return pyStr(line) + }) +} + +func makeBuiltinVars() *PyBuiltin { + return makeBuiltin("vars", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + // Return current locals — need scope context; return empty dict for now + return pyDict() + } + obj := args[0] + switch v := obj.(type) { + case *PyInstance: + d := pyDict() + for k, val := range v.Dict { + d.set(pyStr(k), val) + } + return d + case *PyModule: + d := pyDict() + for k, val := range v.Dict { + d.set(pyStr(k), val) + } + return d + case *PyClass: + d := pyDict() + for k, val := range v.Dict { + d.set(pyStr(k), val) + } + return d + } + raiseTypeError("vars() argument must have __dict__ attribute") + return nil + }) +} + +func makeBuiltinDir() *PyBuiltin { + return makeBuiltin("dir", func(args []Object, kwargs map[string]Object) Object { + var names []string + if len(args) > 0 { + obj := args[0] + switch v := obj.(type) { + case *PyInstance: + for k := range v.Dict { + names = append(names, k) + } + for _, cls := range v.Class.MRO { + for k := range cls.Dict { + names = append(names, k) + } + } + case *PyModule: + for k := range v.Dict { + names = append(names, k) + } + case *PyClass: + for k := range v.Dict { + names = append(names, k) + } + } + } + // Deduplicate and sort + seen := make(map[string]bool) + result := make([]Object, 0) + for _, n := range names { + if !seen[n] { + seen[n] = true + result = append(result, pyStr(n)) + } + } + sortList(result, nil, false) + return pyList(result) + }) +} + +func makeBuiltinFormat() *PyBuiltin { + return makeBuiltin("format", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("format() requires at least 1 argument") + } + val := args[0] + spec := "" + if len(args) > 1 { + spec = mustStr(args[1], "format") + } + s := val.pyStr() + if spec != "" { + s = applyFormatSpec(s, val, spec) + } + return pyStr(s) + }) +} + +func makeBuiltinBytes() *PyBuiltin { + return makeBuiltin("bytes", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 { + return pyBytes([]byte{}) + } + switch v := args[0].(type) { + case *PyInt: + n, _ := v.int64() + if n < 0 { + raiseValueError("bytes length must be >= 0") + } + if n > maxRepeatBytes { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "bytes() size %d exceeds limit (%d)", n, maxRepeatBytes)}) + } + return pyBytes(make([]byte, n)) + case *PyStr: + // Requires encoding + enc := "utf-8" + if len(args) > 1 { + enc = strings.ToLower(mustStr(args[1], "bytes")) + } + _ = enc // Only support UTF-8 + return pyBytes([]byte(v.v)) + case *PyBytes: + cp := make([]byte, len(v.v)) + copy(cp, v.v) + return pyBytes(cp) + default: + // Try iterable of ints + items := collectIterable(args[0]) + b := make([]byte, len(items)) + for i, item := range items { + n := toIntVal(item) + if n < 0 || n > 255 { + raiseValueError("bytes must be in range(0, 256)") + } + b[i] = byte(n) + } + return pyBytes(b) + } + }) +} + +func makeBuiltinBytearray() *PyBuiltin { + return makeBuiltin("bytearray", func(args []Object, kwargs map[string]Object) Object { + // Return a mutable bytes-like — for simplicity return PyBytes + if len(args) == 0 { + return pyBytes([]byte{}) + } + switch v := args[0].(type) { + case *PyInt: + n, _ := v.int64() + if n < 0 { + raiseValueError("bytearray() length must be >= 0") + } + if n > maxRepeatBytes { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "bytearray() size %d exceeds limit (%d)", n, maxRepeatBytes)}) + } + return pyBytes(make([]byte, n)) + case *PyStr: + return pyBytes([]byte(v.v)) + case *PyBytes: + cp := make([]byte, len(v.v)) + copy(cp, v.v) + return pyBytes(cp) + default: + items := collectIterable(args[0]) + b := make([]byte, len(items)) + for i, item := range items { + b[i] = byte(toIntVal(item)) + } + return pyBytes(b) + } + }) +} + +func makeBuiltinMemoryview() *PyBuiltin { + return makeBuiltin("memoryview", func(args []Object, kwargs map[string]Object) Object { + raiseTypeError("memoryview() is not supported in this shell") + return nil + }) +} + +func makeBuiltinOpen(opts *RunOpts) *PyBuiltin { + return makeBuiltin("open", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("open() requires at least 1 argument") + } + var path string + switch v := args[0].(type) { + case *PyStr: + path = v.v + case *PyBytes: + path = string(v.v) + default: + raiseTypeError("open() argument 1 must be str, not %s", args[0].pyType().Name) + } + + mode := "r" + if len(args) > 1 { + mode = mustStr(args[1], "open") + } + if v, ok := kwargs["mode"]; ok { + mode = mustStr(v, "open") + } + + // Reject write/append/exclusive modes + for _, ch := range mode { + switch ch { + case 'w', 'a', 'x', '+': + panic(exceptionSignal{exc: newExceptionf(ExcPermissionError, "open() in write mode is not permitted in this shell")}) + } + } + + binary := strings.ContainsRune(mode, 'b') + + rc, err := opts.Open(opts.Ctx, path, os.O_RDONLY, 0) + if err != nil { + if os.IsNotExist(err) { + panic(exceptionSignal{exc: newExceptionf(ExcFileNotFoundError, "[Errno 2] No such file or directory: '%s'", path)}) + } + panic(exceptionSignal{exc: newExceptionf(ExcOSError, "cannot open %q: %v", path, err)}) + } + + return &PyFile{rc: rc, name: path, binary: binary} + }) +} + +func makeBuiltinSuper() *PyBuiltin { + return makeBuiltin("super", func(args []Object, kwargs map[string]Object) Object { + // Return a sentinel; eval.go must intercept super() calls inside methods + return &PySuper{} + }) +} + +// PySuper is the sentinel returned by super(). +type PySuper struct { + Class *PyClass + Obj Object +} + +func (s *PySuper) pyType() *PyType { return typeClass } +func (s *PySuper) pyRepr() string { return "" } +func (s *PySuper) pyStr() string { return s.pyRepr() } + +func makeBuiltinObject() *PyBuiltin { + return makeBuiltin("object", func(args []Object, kwargs map[string]Object) Object { + cls := &PyClass{ + Name: "object", + Dict: make(map[string]Object), + } + cls.MRO = []*PyClass{cls} + return &PyInstance{Class: cls, Dict: make(map[string]Object)} + }) +} + +// getAttr retrieves an attribute from an object. +func getAttr(obj Object, name string) (Object, bool) { + switch v := obj.(type) { + case *PyInstance: + // Check instance dict first + if val, ok := v.Dict[name]; ok { + return val, true + } + // Then class MRO + for _, cls := range v.Class.MRO { + if val, ok2 := cls.Dict[name]; ok2 { + // Bind if it's a function + if fn, ok3 := val.(*PyFunction); ok3 { + return &PyBoundMethod{Self: v, Func: fn}, true + } + return val, true + } + } + return nil, false + case *PyModule: + if val, ok := v.Dict[name]; ok { + return val, true + } + return nil, false + case *PyClass: + if val, ok := v.Dict[name]; ok { + return val, true + } + // Check base classes + for _, base := range v.MRO[1:] { + if val, ok2 := base.Dict[name]; ok2 { + return val, true + } + } + return nil, false + case *PyStr: + return strGetAttr(v, name) + case *PyList: + return listGetAttr(v, name) + case *PyDict: + return dictGetAttr(v, name) + case *PySet: + return setGetAttr(v, name) + case *PyBytes: + return bytesGetAttr(v, name) + case *PyFile: + return fileGetAttr(v, name) + case *PyException: + // Check dict first + if v.Dict != nil { + if val, ok := v.Dict[name]; ok { + return val, true + } + } + // Common attributes + switch name { + case "args": + return pyTuple(v.Args), true + case "__class__": + return v.ExcClass, true + case "__cause__": + if v.Cause != nil { + return v.Cause, true + } + return pyNone, true + case "__context__": + if v.Context != nil { + return v.Context, true + } + return pyNone, true + } + return nil, false + case *PySuper: + if v.Obj != nil { + // Look up in parent classes + if inst, ok2 := v.Obj.(*PyInstance); ok2 { + // Skip the first class (current) + for i, cls := range inst.Class.MRO { + if i == 0 { + continue + } + if val, ok3 := cls.Dict[name]; ok3 { + if fn, ok4 := val.(*PyFunction); ok4 { + return &PyBoundMethod{Self: inst, Func: fn}, true + } + return val, true + } + _ = cls + } + } + } + return nil, false + case *PyGenerator: + switch name { + case "send": + return makeBuiltin("send", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("send() takes exactly one argument") + } + if v.done { + panic(exceptionSignal{exc: newException(ExcStopIteration, nil)}) + } + if !v.awaitingSend { + raiseTypeError("can't send non-None value to a just-started generator") + } + // Send value into generator (unblock its sendCh receive). + select { + case v.sendCh <- args[0]: + case <-v.ctx.Done(): + v.done = true + panic(exceptionSignal{exc: newExceptionf(ExcGeneratorExit, "cancelled")}) + } + v.awaitingSend = false + // Receive the next yielded value. + var ( + val Object + ok bool + ) + select { + case val, ok = <-v.yieldCh: + case <-v.ctx.Done(): + v.done = true + panic(exceptionSignal{exc: newExceptionf(ExcGeneratorExit, "cancelled")}) + } + if !ok { + v.done = true + panic(exceptionSignal{exc: newException(ExcStopIteration, nil)}) + } + v.awaitingSend = true + return val + }), true + case "__next__": + return makeBuiltin("__next__", func(args []Object, kwargs map[string]Object) Object { + if v.done { + panic(exceptionSignal{exc: newException(ExcStopIteration, nil)}) + } + if v.awaitingSend { + select { + case v.sendCh <- pyNone: + case <-v.ctx.Done(): + v.done = true + panic(exceptionSignal{exc: newExceptionf(ExcGeneratorExit, "cancelled")}) + } + v.awaitingSend = false + } + var ( + val Object + ok bool + ) + select { + case val, ok = <-v.yieldCh: + case <-v.ctx.Done(): + v.done = true + panic(exceptionSignal{exc: newExceptionf(ExcGeneratorExit, "cancelled")}) + } + if !ok { + v.done = true + panic(exceptionSignal{exc: newException(ExcStopIteration, nil)}) + } + v.awaitingSend = true + return val + }), true + case "close": + return makeBuiltin("close", func(args []Object, kwargs map[string]Object) Object { + if v.done { + return pyNone + } + v.done = true + // If the generator goroutine is blocked waiting for sendCh, closing + // sendCh unblocks it so it can observe !ok and exit cleanly. + close(v.sendCh) + // If the generator is blocked waiting to yield a value, drain it so + // the goroutine can proceed to its sendCh receive and observe closure. + select { + case <-v.yieldCh: + default: + } + return pyNone + }), true + case "__iter__": + return makeBuiltin("__iter__", func(args []Object, kwargs map[string]Object) Object { + return v + }), true + } + return nil, false + case *PyTuple: + switch name { + case "count": + return makeBuiltin("count", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("count() takes exactly 1 argument") + } + n := 0 + for _, item := range v.items { + if pyEq(item, args[0]) { + n++ + } + } + return pyInt(int64(n)) + }), true + case "index": + return makeBuiltin("index", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("index() requires at least 1 argument") + } + for i, item := range v.items { + if pyEq(item, args[0]) { + return pyInt(int64(i)) + } + } + raiseValueError("tuple.index(x): x not in tuple") + return nil + }), true + } + return nil, false + } + return nil, false +} + +// setAttr sets an attribute on an object. +func setAttr(obj Object, name string, val Object) { + switch v := obj.(type) { + case *PyInstance: + v.Dict[name] = val + case *PyModule: + v.Dict[name] = val + case *PyClass: + v.Dict[name] = val + case *PyException: + if v.Dict == nil { + v.Dict = make(map[string]Object) + } + v.Dict[name] = val + default: + raiseAttributeError(obj.pyType().Name, name) + } +} + +// pyAdd adds two Python objects. +func pyAdd(a, b Object) Object { + switch av := a.(type) { + case *PyInt: + switch bv := b.(type) { + case *PyInt: + an := av.toBigInt() + bn := bv.toBigInt() + result := new(big.Int).Add(an, bn) + return pyIntBig(result) + case *PyFloat: + if n, ok := av.int64(); ok { + return pyFloat(float64(n) + bv.v) + } + case *PyBool: + var bi int64 + if bv.v { + bi = 1 + } + result := new(big.Int).Add(av.toBigInt(), big.NewInt(bi)) + return pyIntBig(result) + } + case *PyFloat: + switch bv := b.(type) { + case *PyFloat: + return pyFloat(av.v + bv.v) + case *PyInt: + if n, ok := bv.int64(); ok { + return pyFloat(av.v + float64(n)) + } + } + case *PyBool: + var ai int64 + if av.v { + ai = 1 + } + switch bv := b.(type) { + case *PyBool: + var bi int64 + if bv.v { + bi = 1 + } + return pyInt(ai + bi) + case *PyInt: + result := new(big.Int).Add(big.NewInt(ai), bv.toBigInt()) + return pyIntBig(result) + } + case *PyStr: + if bv, ok := b.(*PyStr); ok { + return pyStr(av.v + bv.v) + } + case *PyList: + if bv, ok := b.(*PyList); ok { + items := make([]Object, len(av.items)+len(bv.items)) + copy(items, av.items) + copy(items[len(av.items):], bv.items) + return pyList(items) + } + case *PyTuple: + if bv, ok := b.(*PyTuple); ok { + items := make([]Object, len(av.items)+len(bv.items)) + copy(items, av.items) + copy(items[len(av.items):], bv.items) + return pyTuple(items) + } + case *PyBytes: + if bv, ok := b.(*PyBytes); ok { + result := make([]byte, len(av.v)+len(bv.v)) + copy(result, av.v) + copy(result[len(av.v):], bv.v) + return pyBytes(result) + } + } + raiseTypeError("unsupported operand type(s) for +: '%s' and '%s'", a.pyType().Name, b.pyType().Name) + return nil +} diff --git a/builtins/python/eval.go b/builtins/python/eval.go new file mode 100644 index 00000000..7d2f9536 --- /dev/null +++ b/builtins/python/eval.go @@ -0,0 +1,2781 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "context" + "fmt" + "io" + "math" + "math/big" + "strings" +) + +// maxCallDepth is the maximum recursion depth for function calls. +const maxCallDepth = 500 + +// maxRepeatBytes is the maximum number of bytes that may be produced by a +// sequence-repetition operation (str * n, bytes * n, list * n, tuple * n). +// Exceeding this limit raises MemoryError, preventing OOM attacks via large n. +const maxRepeatBytes = 1 << 20 // 1 MiB + +// genChannels holds the channels used inside a generator goroutine. +type genChannels struct { + sendCh chan Object + yieldCh chan Object + ctx context.Context +} + +// Evaluator is the tree-walking evaluator. +type Evaluator struct { + ctx context.Context + scope *Scope + globals map[string]Object + opts *RunOpts + modules map[string]*PyModule + genState *genChannels + depth int + activeException *PyException +} + +// newEvaluator creates an Evaluator rooted at the module scope and registers its +// callObject for the current goroutine. The returned cleanup function must be +// deferred by the caller to deregister the entry when execution finishes. +func newEvaluator(ctx context.Context, opts *RunOpts, globals map[string]Object, modules map[string]*PyModule) (*Evaluator, func()) { + scope := newModuleScope(globals) + e := &Evaluator{ + ctx: ctx, + scope: scope, + globals: globals, + opts: opts, + modules: modules, + } + // Register the evaluator's callObject for this goroutine so that types.go + // and builtins_funcs.go can call user-defined functions without a shared global. + gid, ok := goroutineID() + if !ok { + // Parsing failed — degrade gracefully rather than crashing the shell. + // callObject will raise RuntimeError if invoked in this state. + return e, func() {} + } + goroutineCallFns.Store(gid, func(fn Object, args []Object, kwargs map[string]Object) Object { + return e.callObject(fn, args, kwargs) + }) + return e, func() { goroutineCallFns.Delete(gid) } +} + +// checkCtx panics with KeyboardInterrupt if the context has been cancelled. +func (e *Evaluator) checkCtx() { + select { + case <-e.ctx.Done(): + panic(exceptionSignal{exc: newExceptionf(ExcKeyboardInterrupt, "interrupted")}) + default: + } +} + +// ---- Statement execution ---- + +// exec dispatches each statement in the list. +func (e *Evaluator) exec(stmts []Stmt) { + for _, s := range stmts { + e.execStmt(s) + } +} + +func (e *Evaluator) execStmt(s Stmt) { + switch n := s.(type) { + case *AssignStmt: + e.execAssign(n) + case *AugAssignStmt: + e.execAugAssign(n) + case *AnnAssignStmt: + e.execAnnAssign(n) + case *ExprStmt: + e.eval(n.Value) + case *IfStmt: + e.execIf(n) + case *WhileStmt: + e.execWhile(n) + case *ForStmt: + e.execFor(n) + case *FuncDef: + e.execFuncDef(n) + case *ClassDef: + e.execClassDef(n) + case *ReturnStmt: + e.execReturn(n) + case *BreakStmt: + panic(controlSignal{kind: ctrlBreak}) + case *ContinueStmt: + panic(controlSignal{kind: ctrlContinue}) + case *PassStmt: + // nothing + case *RaiseStmt: + e.execRaise(n) + case *TryStmt: + e.execTry(n) + case *WithStmt: + e.execWith(n) + case *ImportStmt: + e.execImport(n) + case *ImportFromStmt: + e.execImportFrom(n) + case *GlobalStmt: + e.execGlobal(n) + case *NonlocalStmt: + e.execNonlocal(n) + case *DelStmt: + e.execDel(n) + case *AssertStmt: + e.execAssert(n) + } +} + +func (e *Evaluator) execAssign(n *AssignStmt) { + val := e.eval(n.Value) + for _, target := range n.Targets { + e.assign(target, val) + } +} + +func (e *Evaluator) execAugAssign(n *AugAssignStmt) { + current := e.eval(n.Target) + rhs := e.eval(n.Value) + // Strip trailing '=' from augmented operator (e.g. "+=" → "+"). + op := strings.TrimSuffix(n.Op, "=") + result := e.applyBinOp(op, current, rhs) + e.assign(n.Target, result) +} + +func (e *Evaluator) execAnnAssign(n *AnnAssignStmt) { + if n.Value != nil { + val := e.eval(n.Value) + e.assign(n.Target, val) + } +} + +func (e *Evaluator) execIf(n *IfStmt) { + if pyTruth(e.eval(n.Test)) { + e.exec(n.Body) + } else { + e.exec(n.Orelse) + } +} + +func (e *Evaluator) execWhile(n *WhileStmt) { + for { + e.checkCtx() + if !pyTruth(e.eval(n.Test)) { + break + } + brk := e.execLoopBody(n.Body) + if brk { + return + } + } + e.exec(n.Orelse) +} + +func (e *Evaluator) execFor(n *ForStmt) { + items := e.iterateObj(e.eval(n.Iter)) + for _, item := range items { + e.checkCtx() + e.assign(n.Target, item) + brk := e.execLoopBody(n.Body) + if brk { + return + } + } + e.exec(n.Orelse) +} + +// execLoopBody runs the body, returning true if a break was hit. +func (e *Evaluator) execLoopBody(body []Stmt) (brk bool) { + defer func() { + r := recover() + if r == nil { + return + } + if sig, ok := r.(controlSignal); ok { + switch sig.kind { + case ctrlBreak: + brk = true + return + case ctrlContinue: + return + } + } + panic(r) + }() + e.exec(body) + return false +} + +func (e *Evaluator) execFuncDef(n *FuncDef) { + // Evaluate defaults in current scope + defaults := make([]Object, len(n.Args.Defaults)) + for i, d := range n.Args.Defaults { + defaults[i] = e.eval(d) + } + kwDefaults := make(map[string]Object) + for i, kw := range n.Args.KwOnly { + if i < len(n.Args.KwDefaults) && n.Args.KwDefaults[i] != nil { + kwDefaults[kw] = e.eval(n.Args.KwDefaults[i]) + } + } + + fn := &PyFunction{ + Name: n.Name, + Args: n.Args, + Body: n.Body, + Closure: e.scope, + Globals: e.globals, + Defaults: defaults, + KwDefaults: kwDefaults, + IsGen: n.IsGen, + } + + // Apply decorators in reverse order + var obj Object = fn + for i := len(n.Decorators) - 1; i >= 0; i-- { + dec := e.eval(n.Decorators[i]) + obj = e.callObject(dec, []Object{obj}, nil) + } + + e.scope.set(n.Name, obj) +} + +// objectClass is the implicit base class for all user-defined classes. +var objectClass = &PyClass{Name: "object", Dict: make(map[string]Object)} + +func init() { + objectClass.MRO = []*PyClass{objectClass} +} + +func (e *Evaluator) execClassDef(n *ClassDef) { + // Resolve base classes before executing body + bases := make([]*PyClass, 0, len(n.Bases)) + for _, b := range n.Bases { + bObj := e.eval(b) + switch bc := bObj.(type) { + case *PyClass: + bases = append(bases, bc) + default: + raiseTypeError("bases must be classes, not %s", bObj.pyType().Name) + } + } + if len(bases) == 0 { + bases = []*PyClass{objectClass} + } + + // Execute class body in a new scope + classScope := newFunctionScope(e.scope, e.globals, n.Name) + classScope.class = &PyClass{Name: n.Name} // placeholder for __class__ ref + + child := &Evaluator{ + ctx: e.ctx, + scope: classScope, + globals: e.globals, + opts: e.opts, + modules: e.modules, + depth: e.depth, + } + // Propagate callObject binding + child.exec(n.Body) + + // Collect class dict + classDict := make(map[string]Object, len(classScope.vars)) + for k, v := range classScope.vars { + classDict[k] = v + } + + cls := &PyClass{Name: n.Name, Bases: bases, Dict: classDict} + cls.MRO = computeMRO(cls) + + // Bind __class__ in methods so super() works + classScope.class = cls + + // Apply decorators + var obj Object = cls + for i := len(n.Decorators) - 1; i >= 0; i-- { + dec := e.eval(n.Decorators[i]) + obj = e.callObject(dec, []Object{obj}, nil) + } + + e.scope.set(n.Name, obj) +} + +func (e *Evaluator) execReturn(n *ReturnStmt) { + var val Object = pyNone + if n.Value != nil { + val = e.eval(n.Value) + } + panic(controlSignal{kind: ctrlReturn, value: val}) +} + +func (e *Evaluator) execRaise(n *RaiseStmt) { + if n.Exc == nil { + // bare raise — re-raise active exception + if e.activeException != nil { + exc := e.activeException + if n.Cause != nil { + cause := e.eval(n.Cause) + if ce, ok := cause.(*PyException); ok { + exc.Cause = ce + } + } + panic(exceptionSignal{exc: exc}) + } + panic(exceptionSignal{exc: newExceptionf(ExcRuntimeError, "No active exception to re-raise")}) + } + + excVal := e.eval(n.Exc) + var exc *PyException + switch v := excVal.(type) { + case *PyException: + exc = v + case *PyClass: + // Bare class raise: raise ValueError → instantiate with no args + exc = newException(v) + default: + raiseTypeError("exceptions must derive from BaseException") + } + + if n.Cause != nil { + causeVal := e.eval(n.Cause) + switch cv := causeVal.(type) { + case *PyException: + exc.Cause = cv + case *PyClass: + exc.Cause = newException(cv) + } + } + + panic(exceptionSignal{exc: exc}) +} + +func (e *Evaluator) execTry(n *TryStmt) { + var handlerPanic interface{} + + // Outer defer runs finally block + defer func() { + if len(n.Finally) > 0 { + r := recover() + e.exec(n.Finally) + if r != nil { + panic(r) + } + } + }() + + // Inner function handles except clauses + func() { + defer func() { + r := recover() + if r == nil { + return + } + sig, ok := r.(exceptionSignal) + if !ok { + handlerPanic = r + return + } + + // Try each except handler + for _, h := range n.Handlers { + if e.handlerMatches(sig.exc, h) { + prevExc := e.activeException + e.activeException = sig.exc + if h.Name != "" { + e.scope.set(h.Name, sig.exc) + } + defer func() { + e.activeException = prevExc + if h.Name != "" { + e.scope.delete(h.Name) + } + }() + e.exec(h.Body) + return + } + } + // No handler matched — re-panic + handlerPanic = sig + }() + e.exec(n.Body) + // else clause runs only if no exception + e.exec(n.Orelse) + }() + + if handlerPanic != nil { + panic(handlerPanic) + } +} + +func (e *Evaluator) handlerMatches(exc *PyException, h *ExceptHandler) bool { + if h.Type == nil { + return true // bare except + } + typeVal := e.eval(h.Type) + switch tv := typeVal.(type) { + case *PyClass: + return exceptionMatchesClass(exc, tv) + case *PyTuple: + for _, item := range tv.items { + if cls, ok := item.(*PyClass); ok { + if exceptionMatchesClass(exc, cls) { + return true + } + } + } + } + return false +} + +func (e *Evaluator) execWith(n *WithStmt) { + type ctxEntry struct { + mgr Object + optVar Expr + } + + // Enter each context manager in order. If __enter__ raises, unwind all + // already-entered managers before propagating the exception (matching + // CPython's behaviour: previously entered managers must still run __exit__). + entries := make([]ctxEntry, 0, len(n.Items)) + var enterPanic interface{} + func() { + defer func() { + enterPanic = recover() + }() + for _, item := range n.Items { + mgr := e.eval(item.CtxExpr) + entered := e.callMethod(mgr, "__enter__", nil, nil) + entries = append(entries, ctxEntry{mgr: mgr, optVar: item.OptVar}) + if item.OptVar != nil { + e.assign(item.OptVar, entered) + } + } + }() + if enterPanic != nil { + // Unwind already-entered managers in reverse order with (None, None, None). + for i := len(entries) - 1; i >= 0; i-- { + e.callMethod(entries[i].mgr, "__exit__", []Object{pyNone, pyNone, pyNone}, nil) + } + panic(enterPanic) + } + + var bodyPanic interface{} + func() { + defer func() { + bodyPanic = recover() + }() + e.exec(n.Body) + }() + + // Call __exit__ for each context manager in reverse order. + // Once a manager suppresses the exception (returns truthy), subsequent + // outer managers must receive (None, None, None), not the original + // exception — matching CPython semantics. + for i := len(entries) - 1; i >= 0; i-- { + mgr := entries[i].mgr + var result Object + if bodyPanic != nil { + if sig, ok := bodyPanic.(exceptionSignal); ok { + // Pass (type, value, traceback): exc_type is the class, exc_val is + // the instance, traceback is None (we don't model tb objects). + result = e.callMethod(mgr, "__exit__", []Object{sig.exc.ExcClass, sig.exc, pyNone}, nil) + } else { + result = e.callMethod(mgr, "__exit__", []Object{pyNone, pyNone, pyNone}, nil) + } + } else { + result = e.callMethod(mgr, "__exit__", []Object{pyNone, pyNone, pyNone}, nil) + } + if pyTruth(result) { + // Exception suppressed: clear bodyPanic so outer managers in this + // same with-chain receive (None, None, None) as required by Python. + bodyPanic = nil + } + } + + if bodyPanic != nil { + panic(bodyPanic) + } +} + +func (e *Evaluator) execImport(n *ImportStmt) { + for _, name := range n.Names { + mod, found := loadModule(name.Name, e.opts) + if !found { + // Check cache + if cached, ok := e.modules[name.Name]; ok { + mod = cached + } else { + panic(exceptionSignal{exc: newExceptionf(ExcImportError, "No module named '%s'", name.Name)}) + } + } + if mod != nil { + e.modules[name.Name] = mod + } + + bindName := name.Name + if name.Alias != "" { + bindName = name.Alias + } else { + // For "import a.b", bind the top-level name + dotIdx := 0 + for dotIdx < len(bindName) && bindName[dotIdx] != '.' { + dotIdx++ + } + bindName = bindName[:dotIdx] + } + if mod != nil { + e.scope.set(bindName, mod) + } + } +} + +func (e *Evaluator) execImportFrom(n *ImportFromStmt) { + mod, found := loadModule(n.Module, e.opts) + if !found { + if cached, ok := e.modules[n.Module]; ok { + mod = cached + } else { + panic(exceptionSignal{exc: newExceptionf(ExcImportError, "No module named '%s'", n.Module)}) + } + } + if mod != nil { + e.modules[n.Module] = mod + } + + if len(n.Names) == 1 && n.Names[0].Name == "*" { + // Star import + if mod != nil { + for k, v := range mod.Dict { + e.scope.set(k, v) + } + } + return + } + + for _, name := range n.Names { + if mod == nil { + panic(exceptionSignal{exc: newExceptionf(ExcImportError, "cannot import name '%s' from '%s'", name.Name, n.Module)}) + } + val, ok := mod.Dict[name.Name] + if !ok { + panic(exceptionSignal{exc: newExceptionf(ExcImportError, "cannot import name '%s' from '%s'", name.Name, n.Module)}) + } + bindName := name.Name + if name.Alias != "" { + bindName = name.Alias + } + e.scope.set(bindName, val) + } +} + +func (e *Evaluator) execGlobal(n *GlobalStmt) { + if e.scope.globalNames == nil { + e.scope.globalNames = make(map[string]bool) + } + for _, name := range n.Names { + e.scope.globalNames[name] = true + } +} + +func (e *Evaluator) execNonlocal(n *NonlocalStmt) { + if e.scope.nonlocalNames == nil { + e.scope.nonlocalNames = make(map[string]bool) + } + for _, name := range n.Names { + e.scope.nonlocalNames[name] = true + } +} + +func (e *Evaluator) execDel(n *DelStmt) { + for _, target := range n.Targets { + e.delTarget(target) + } +} + +func (e *Evaluator) delTarget(target Expr) { + switch t := target.(type) { + case *NameExpr: + if !e.scope.delete(t.Id) { + // Check globals + if _, ok := e.globals[t.Id]; ok { + delete(e.globals, t.Id) + } else { + raiseNameError(t.Id) + } + } + case *AttributeExpr: + obj := e.eval(t.Value) + switch v := obj.(type) { + case *PyInstance: + delete(v.Dict, t.Attr) + case *PyClass: + delete(v.Dict, t.Attr) + default: + raiseAttributeError(obj.pyType().Name, t.Attr) + } + case *SubscriptExpr: + obj := e.eval(t.Value) + key := e.eval(t.Slice) + e.delItem(obj, key) + case *TupleExpr: + for _, elt := range t.Elts { + e.delTarget(elt) + } + case *ListExpr: + for _, elt := range t.Elts { + e.delTarget(elt) + } + } +} + +func (e *Evaluator) delItem(obj Object, key Object) { + switch v := obj.(type) { + case *PyDict: + if !v.del(key) { + raiseKeyError(key) + } + case *PyList: + idx := int(toIntVal(key)) + if idx < 0 { + idx = len(v.items) + idx + } + if idx < 0 || idx >= len(v.items) { + raiseIndexError("list assignment index out of range") + } + v.items = append(v.items[:idx], v.items[idx+1:]...) + default: + // Try __delitem__ + if inst, ok := obj.(*PyInstance); ok { + if fn, ok2 := inst.lookupMethod("__delitem__"); ok2 { + e.callObject(fn, []Object{inst, key}, nil) + return + } + } + raiseTypeError("'%s' object doesn't support item deletion", obj.pyType().Name) + } +} + +func (e *Evaluator) execAssert(n *AssertStmt) { + if !pyTruth(e.eval(n.Test)) { + var msg Object = pyNone + if n.Msg != nil { + msg = e.eval(n.Msg) + } + if msg == pyNone { + panic(exceptionSignal{exc: newException(ExcAssertionError)}) + } + panic(exceptionSignal{exc: newExceptionf(ExcAssertionError, "%s", msg.pyStr())}) + } +} + +// ---- Expression evaluation ---- + +// eval evaluates an expression node and returns the result. +func (e *Evaluator) eval(node Node) Object { + if node == nil { + return pyNone + } + switch n := node.(type) { + case *BinOp: + return e.evalBinOp(n) + case *UnaryOp: + return e.evalUnaryOp(n) + case *BoolOp: + return e.evalBoolOp(n) + case *Compare: + return e.evalCompare(n) + case *CallExpr: + return e.evalCall(n) + case *AttributeExpr: + return e.evalAttribute(n) + case *SubscriptExpr: + return e.evalSubscript(n) + case *SliceExpr: + return e.evalSlice(n) + case *NameExpr: + return e.evalName(n) + case *Constant: + return e.evalConstant(n) + case *ListExpr: + return e.evalList(n) + case *TupleExpr: + return e.evalTuple(n) + case *DictExpr: + return e.evalDict(n) + case *SetExpr: + return e.evalSet(n) + case *IfExp: + return e.evalIfExp(n) + case *Lambda: + return e.evalLambda(n) + case *ListComp: + return e.evalListComp(n) + case *DictComp: + return e.evalDictComp(n) + case *SetComp: + return e.evalSetComp(n) + case *GeneratorExp: + return e.evalGeneratorExp(n) + case *Yield: + return e.evalYield(n) + case *YieldFrom: + return e.evalYieldFrom(n) + case *Starred: + // Starred outside of assignment context: evaluate inner value + return e.eval(n.Value) + } + return pyNone +} + +func (e *Evaluator) evalBinOp(n *BinOp) Object { + left := e.eval(n.Left) + // Short-circuit-safe: right is evaluated after left + right := e.eval(n.Right) + return e.applyBinOp(n.Op, left, right) +} + +func (e *Evaluator) applyBinOp(op string, left, right Object) Object { + switch op { + case "+": + return pyAdd(left, right) + case "-": + // set - set = difference + if ls, ok := left.(*PySet); ok { + if rs, ok2 := right.(*PySet); ok2 { + result := &PySet{items: make(map[any]Object)} + for k, item := range ls.items { + if _, ok3 := rs.items[k]; !ok3 { + result.items[k] = item + } + } + return result + } + } + return e.numericBinOp(op, left, right) + case "*": + return e.mulOp(left, right) + case "/": + return e.divOp(left, right) + case "//": + return e.floorDivOp(left, right) + case "%": + return e.modOp(left, right) + case "**": + return e.powOp(left, right) + case "&": + return e.bitwiseOp(op, left, right) + case "|": + return e.bitwiseOrOp(left, right) + case "^": + return e.bitwiseOp(op, left, right) + case "<<": + return e.bitwiseOp(op, left, right) + case ">>": + return e.bitwiseOp(op, left, right) + case "@": + // matmul: try __matmul__ + if inst, ok := left.(*PyInstance); ok { + if fn, ok2 := inst.lookupMethod("__matmul__"); ok2 { + return e.callObject(fn, []Object{inst, right}, nil) + } + } + raiseTypeError("unsupported operand type(s) for @: '%s' and '%s'", left.pyType().Name, right.pyType().Name) + } + raiseTypeError("unsupported operator: %s", op) + return nil +} + +func (e *Evaluator) numericBinOp(op string, left, right Object) Object { + // Normalize bools to int + left = normBool(left) + right = normBool(right) + + switch lv := left.(type) { + case *PyInt: + switch rv := right.(type) { + case *PyInt: + la, ra := lv.toBigInt(), rv.toBigInt() + var result *big.Int + switch op { + case "-": + result = new(big.Int).Sub(la, ra) + default: + raiseTypeError("unsupported int op %s", op) + } + return pyIntBig(result) + case *PyFloat: + if n, ok := lv.int64(); ok { + switch op { + case "-": + return pyFloat(float64(n) - rv.v) + } + } + } + case *PyFloat: + rf := toFloatVal(right) + switch op { + case "-": + return pyFloat(lv.v - rf) + } + } + raiseTypeError("unsupported operand type(s) for %s: '%s' and '%s'", op, left.pyType().Name, right.pyType().Name) + return nil +} + +func (e *Evaluator) mulOp(left, right Object) Object { + // str * int, int * str, list * int, int * list + switch lv := left.(type) { + case *PyStr: + if n, ok := toOptInt(right); ok { + if n <= 0 { + return pyStr("") + } + checkRepeatBytesLimit(len(lv.v), n) + result := make([]byte, 0, len(lv.v)*int(n)) + for i := int64(0); i < n; i++ { + result = append(result, lv.v...) + } + return pyStr(string(result)) + } + case *PyList: + if n, ok := toOptInt(right); ok { + if n <= 0 { + return pyList(nil) + } + checkRepeatItemsLimit(len(lv.items), n) + items := make([]Object, 0, len(lv.items)*int(n)) + for i := int64(0); i < n; i++ { + items = append(items, lv.items...) + } + return pyList(items) + } + case *PyTuple: + if n, ok := toOptInt(right); ok { + if n <= 0 { + return pyTuple(nil) + } + checkRepeatItemsLimit(len(lv.items), n) + items := make([]Object, 0, len(lv.items)*int(n)) + for i := int64(0); i < n; i++ { + items = append(items, lv.items...) + } + return pyTuple(items) + } + case *PyBytes: + if n, ok := toOptInt(right); ok { + if n <= 0 { + return pyBytes(nil) + } + checkRepeatBytesLimit(len(lv.v), n) + result := make([]byte, 0, len(lv.v)*int(n)) + for i := int64(0); i < n; i++ { + result = append(result, lv.v...) + } + return pyBytes(result) + } + case *PyInt, *PyBool: + // int * str, int * list, etc. + n := toIntVal(left) + switch rv := right.(type) { + case *PyStr: + if n <= 0 { + return pyStr("") + } + checkRepeatBytesLimit(len(rv.v), n) + result := make([]byte, 0, len(rv.v)*int(n)) + for i := int64(0); i < n; i++ { + result = append(result, rv.v...) + } + return pyStr(string(result)) + case *PyList: + if n <= 0 { + return pyList(nil) + } + checkRepeatItemsLimit(len(rv.items), n) + items := make([]Object, 0, len(rv.items)*int(n)) + for i := int64(0); i < n; i++ { + items = append(items, rv.items...) + } + return pyList(items) + case *PyTuple: + if n <= 0 { + return pyTuple(nil) + } + checkRepeatItemsLimit(len(rv.items), n) + items := make([]Object, 0, len(rv.items)*int(n)) + for i := int64(0); i < n; i++ { + items = append(items, rv.items...) + } + return pyTuple(items) + case *PyBytes: + if n <= 0 { + return pyBytes(nil) + } + checkRepeatBytesLimit(len(rv.v), n) + result := make([]byte, 0, len(rv.v)*int(n)) + for i := int64(0); i < n; i++ { + result = append(result, rv.v...) + } + return pyBytes(result) + case *PyInt, *PyBool, *PyFloat: + // numeric * numeric + return e.numericMul(left, right) + } + case *PyFloat: + return e.numericMul(left, right) + } + // Fall back to numeric mul + return e.numericMul(left, right) +} + +// checkRepeatBytesLimit raises MemoryError if repeating unitLen bytes n times +// would exceed maxRepeatBytes. unitLen==0 is always safe (empty string/bytes). +func checkRepeatBytesLimit(unitLen int, n int64) { + if unitLen > 0 && n > maxRepeatBytes/int64(unitLen) { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "repeated string/bytes is too large")}) + } +} + +// checkRepeatItemsLimit raises MemoryError if repeating unitLen items n times +// would produce more than maxRepeatBytes/8 items (each Object pointer is 8 bytes). +func checkRepeatItemsLimit(unitLen int, n int64) { + const maxItems = maxRepeatBytes / 8 // ~128k objects + if unitLen > 0 && n > maxItems/int64(unitLen) { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "repeated list/tuple is too large")}) + } +} + +func (e *Evaluator) numericMul(left, right Object) Object { + left = normBool(left) + right = normBool(right) + switch lv := left.(type) { + case *PyInt: + switch rv := right.(type) { + case *PyInt: + result := new(big.Int).Mul(lv.toBigInt(), rv.toBigInt()) + return pyIntBig(result) + case *PyFloat: + if n, ok := lv.int64(); ok { + return pyFloat(float64(n) * rv.v) + } + } + case *PyFloat: + switch rv := right.(type) { + case *PyFloat: + return pyFloat(lv.v * rv.v) + case *PyInt: + if n, ok := rv.int64(); ok { + return pyFloat(lv.v * float64(n)) + } + } + } + raiseTypeError("unsupported operand type(s) for *: '%s' and '%s'", left.pyType().Name, right.pyType().Name) + return nil +} + +func (e *Evaluator) divOp(left, right Object) Object { + // Python 3: / always returns float + lf := toFloatVal(left) + rf := toFloatVal(right) + if rf == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "division by zero")}) + } + return pyFloat(lf / rf) +} + +func (e *Evaluator) floorDivOp(left, right Object) Object { + left = normBool(left) + right = normBool(right) + switch lv := left.(type) { + case *PyInt: + switch rv := right.(type) { + case *PyInt: + // Use big.Int arithmetic to handle values that don't fit in int64. + la, ra := lv.toBigInt(), rv.toBigInt() + if ra.Sign() == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "integer division or modulo by zero")}) + } + q, rem := new(big.Int).DivMod(la, ra, new(big.Int)) + // Python floor division: round toward negative infinity. + if rem.Sign() != 0 && (rem.Sign() < 0) != (ra.Sign() < 0) { + q.Sub(q, big.NewInt(1)) + } + return pyIntBig(q) + case *PyFloat: + if n, ok := lv.int64(); ok { + if rv.v == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "float floor division by zero")}) + } + return pyFloat(math.Floor(float64(n) / rv.v)) + } + // Big-int operand: convert to float + f, _ := new(big.Float).SetInt(lv.toBigInt()).Float64() + if rv.v == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "float floor division by zero")}) + } + return pyFloat(math.Floor(f / rv.v)) + } + case *PyFloat: + rf := toFloatVal(right) + if rf == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "float floor division by zero")}) + } + return pyFloat(math.Floor(lv.v / rf)) + } + raiseTypeError("unsupported operand type(s) for //: '%s' and '%s'", left.pyType().Name, right.pyType().Name) + return nil +} + +func (e *Evaluator) modOp(left, right Object) Object { + // str % args: format string + if ls, ok := left.(*PyStr); ok { + return pyStr(strPercent(ls.v, right)) + } + + left = normBool(left) + right = normBool(right) + + switch lv := left.(type) { + case *PyInt: + switch rv := right.(type) { + case *PyInt: + // Use big.Int arithmetic to handle values that don't fit in int64. + la, ra := lv.toBigInt(), rv.toBigInt() + if ra.Sign() == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "integer division or modulo by zero")}) + } + r := new(big.Int).Mod(la, ra) + // Python: result has same sign as divisor (Mod already does this for big.Int) + return pyIntBig(r) + case *PyFloat: + if n, ok := lv.int64(); ok { + if rv.v == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "float modulo")}) + } + r := math.Mod(float64(n), rv.v) + if r != 0 && ((r < 0) != (rv.v < 0)) { + r += rv.v + } + return pyFloat(r) + } + // Big-int operand: convert to float + f, _ := new(big.Float).SetInt(lv.toBigInt()).Float64() + if rv.v == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "float modulo")}) + } + r := math.Mod(f, rv.v) + if r != 0 && ((r < 0) != (rv.v < 0)) { + r += rv.v + } + return pyFloat(r) + } + case *PyFloat: + rf := toFloatVal(right) + if rf == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcZeroDivisionError, "float modulo")}) + } + r := math.Mod(lv.v, rf) + if r != 0 && ((r < 0) != (rf < 0)) { + r += rf + } + return pyFloat(r) + } + raiseTypeError("unsupported operand type(s) for %%: '%s' and '%s'", left.pyType().Name, right.pyType().Name) + return nil +} + +func (e *Evaluator) powOp(left, right Object) Object { + left = normBool(left) + right = normBool(right) + switch lv := left.(type) { + case *PyInt: + switch rv := right.(type) { + case *PyInt: + en, eok := rv.int64() + if eok && en >= 0 { + // Guard against exponents that would produce astronomically large results. + // Analogous to the maxShift cap on <<. Allow up to ~8 Mbit of result. + const maxExpBits = 8 * maxRepeatBytes + baseBits := int64(lv.toBigInt().BitLen()) + if baseBits > 1 && en > maxExpBits/baseBits { + panic(exceptionSignal{exc: newExceptionf(ExcOverflowError, "integer exponentiation result too large")}) + } + result := new(big.Int).Exp(lv.toBigInt(), rv.toBigInt(), nil) + return pyIntBig(result) + } + // Negative exponent → float. Use big.Float for full precision so + // that large bases (e.g. 2**80) are not silently truncated to zero. + bf, _ := new(big.Float).SetInt(lv.toBigInt()).Float64() + ef, _ := new(big.Float).SetInt(rv.toBigInt()).Float64() + return pyFloat(math.Pow(bf, ef)) + case *PyFloat: + // Use big.Float for full precision when the base is a large integer. + bf, _ := new(big.Float).SetInt(lv.toBigInt()).Float64() + return pyFloat(math.Pow(bf, rv.v)) + } + case *PyFloat: + rf := toFloatVal(right) + return pyFloat(math.Pow(lv.v, rf)) + } + raiseTypeError("unsupported operand type(s) for **: '%s' and '%s'", left.pyType().Name, right.pyType().Name) + return nil +} + +func (e *Evaluator) bitwiseOp(op string, left, right Object) Object { + // Set operations: &=intersection, ^=symmetric difference, -=difference + if ls, ok := left.(*PySet); ok { + if rs, ok2 := right.(*PySet); ok2 { + result := &PySet{items: make(map[any]Object)} + switch op { + case "&": + for k, item := range ls.items { + if _, ok3 := rs.items[k]; ok3 { + result.items[k] = item + } + } + case "^": + for k, item := range ls.items { + if _, ok3 := rs.items[k]; !ok3 { + result.items[k] = item + } + } + for k, item := range rs.items { + if _, ok3 := ls.items[k]; !ok3 { + result.items[k] = item + } + } + case "-": + for k, item := range ls.items { + if _, ok3 := rs.items[k]; !ok3 { + result.items[k] = item + } + } + } + return result + } + raiseTypeError("unsupported operand type(s) for %s: 'set' and '%s'", op, right.pyType().Name) + } + + left = normBool(left) + right = normBool(right) + lv, lok := left.(*PyInt) + rv, rok := right.(*PyInt) + if !lok || !rok { + raiseTypeError("unsupported operand type(s) for %s: '%s' and '%s'", op, left.pyType().Name, right.pyType().Name) + } + switch op { + case "&": + result := new(big.Int).And(lv.toBigInt(), rv.toBigInt()) + return pyIntBig(result) + case "^": + result := new(big.Int).Xor(lv.toBigInt(), rv.toBigInt()) + return pyIntBig(result) + case "<<": + rn, rok2 := rv.int64() + if !rok2 || rn < 0 { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "negative shift count")}) + } + // Cap shift to prevent OOM on huge left shifts (result would exceed output limit anyway). + const maxShift = 1 << 23 // 8 MB worth of bits + if rn > maxShift { + rn = maxShift + } + br := new(big.Int).Lsh(lv.toBigInt(), uint(rn)) + return pyIntBig(br) + case ">>": + rn, rok2 := rv.int64() + if !rok2 || rn < 0 { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "negative shift count")}) + } + br := new(big.Int).Rsh(lv.toBigInt(), uint(rn)) + return pyIntBig(br) + } + return pyInt(0) +} + +func (e *Evaluator) bitwiseOrOp(left, right Object) Object { + // set | set = union + if ls, ok := left.(*PySet); ok { + if rs, ok2 := right.(*PySet); ok2 { + result := &PySet{items: make(map[any]Object)} + for k, item := range ls.items { + result.items[k] = item + } + for k, item := range rs.items { + result.items[k] = item + } + return result + } + raiseTypeError("unsupported operand type(s) for |: 'set' and '%s'", right.pyType().Name) + } + + left = normBool(left) + right = normBool(right) + + // dict | dict (Python 3.9+) + if ld, ok := left.(*PyDict); ok { + if rd, ok2 := right.(*PyDict); ok2 { + newD := pyDict() + for i, k := range ld.keys { + newD.set(k, ld.vals[i]) + } + for i, k := range rd.keys { + newD.set(k, rd.vals[i]) + } + return newD + } + } + + // set | set + if ls, ok := left.(*PySet); ok { + if rs, ok2 := right.(*PySet); ok2 { + result := &PySet{items: make(map[any]Object)} + for k, v := range ls.items { + result.items[k] = v + } + for k, v := range rs.items { + result.items[k] = v + } + return result + } + } + + // int | int + lv, lok := left.(*PyInt) + rv, rok := right.(*PyInt) + if !lok || !rok { + raiseTypeError("unsupported operand type(s) for |: '%s' and '%s'", left.pyType().Name, right.pyType().Name) + } + result := new(big.Int).Or(lv.toBigInt(), rv.toBigInt()) + return pyIntBig(result) +} + +func (e *Evaluator) evalUnaryOp(n *UnaryOp) Object { + operand := e.eval(n.Operand) + switch n.Op { + case "-": + operand = normBool(operand) + switch v := operand.(type) { + case *PyInt: + result := new(big.Int).Neg(v.toBigInt()) + return pyIntBig(result) + case *PyFloat: + return pyFloat(-v.v) + } + raiseTypeError("bad operand type for unary -: '%s'", operand.pyType().Name) + case "+": + operand = normBool(operand) + switch v := operand.(type) { + case *PyInt: + return v + case *PyFloat: + return v + } + raiseTypeError("bad operand type for unary +: '%s'", operand.pyType().Name) + case "~": + operand = normBool(operand) + if v, ok := operand.(*PyInt); ok { + if v.big == nil { + return pyInt(^v.small) + } + result := new(big.Int).Not(v.big) + return pyIntBig(result) + } + raiseTypeError("bad operand type for unary ~: '%s'", operand.pyType().Name) + case "not": + return pyBool(!pyTruth(operand)) + } + return pyNone +} + +func (e *Evaluator) evalBoolOp(n *BoolOp) Object { + if n.Op == "and" { + var result Object = pyTrue + for _, val := range n.Values { + result = e.eval(val) + if !pyTruth(result) { + return result + } + } + return result + } + // or + var result Object = pyFalse + for _, val := range n.Values { + result = e.eval(val) + if pyTruth(result) { + return result + } + } + return result +} + +func (e *Evaluator) evalCompare(n *Compare) Object { + left := e.eval(n.Left) + for i, op := range n.Ops { + right := e.eval(n.Comparators[i]) + if !e.compareTwo(op, left, right) { + return pyFalse + } + left = right + } + return pyTrue +} + +func (e *Evaluator) compareTwo(op string, left, right Object) bool { + switch op { + case "==": + return pyEq(left, right) + case "!=": + return !pyEq(left, right) + case "<": + return pyCompare(left, right) < 0 + case "<=": + return pyCompare(left, right) <= 0 + case ">": + return pyCompare(left, right) > 0 + case ">=": + return pyCompare(left, right) >= 0 + case "in": + return e.contains(right, left) + case "not in": + return !e.contains(right, left) + case "is": + return left == right + case "is not": + return left != right + } + return false +} + +func (e *Evaluator) contains(container, item Object) bool { + switch c := container.(type) { + case *PyList: + for _, v := range c.items { + if pyEq(v, item) { + return true + } + } + return false + case *PyTuple: + for _, v := range c.items { + if pyEq(v, item) { + return true + } + } + return false + case *PyStr: + if s, ok := item.(*PyStr); ok { + return len(s.v) == 0 || containsSubstring(c.v, s.v) + } + raiseTypeError("'in ' requires string as left operand, not %s", item.pyType().Name) + case *PyDict: + k, err := hashKey(item) + if err != nil { + return false + } + _, ok := c.index[k] + return ok + case *PySet: + k, err := hashKey(item) + if err != nil { + return false + } + _, ok := c.items[k] + return ok + case *PyFrozenSet: + k, err := hashKey(item) + if err != nil { + return false + } + _, ok := c.items[k] + return ok + case *PyBytes: + if b, ok := item.(*PyBytes); ok { + return bytesContains(c.v, b.v) + } + if n, ok := item.(*PyInt); ok { + v, _ := n.int64() + for _, byt := range c.v { + if int64(byt) == v { + return true + } + } + return false + } + case *PyRange: + items := collectIterable(c) + for _, v := range items { + if pyEq(v, item) { + return true + } + } + return false + case *PyInstance: + if fn, ok2 := c.lookupMethod("__contains__"); ok2 { + result := e.callObject(fn, []Object{c, item}, nil) + return pyTruth(result) + } + } + raiseTypeError("argument of type '%s' is not iterable", container.pyType().Name) + return false +} + +func containsSubstring(s, sub string) bool { + if len(sub) == 0 { + return true + } + for i := 0; i <= len(s)-len(sub); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} + +func bytesContains(haystack, needle []byte) bool { + if len(needle) == 0 { + return true + } + for i := 0; i <= len(haystack)-len(needle); i++ { + match := true + for j := range needle { + if haystack[i+j] != needle[j] { + match = false + break + } + } + if match { + return true + } + } + return false +} + +func (e *Evaluator) evalCall(n *CallExpr) Object { + fn := e.eval(n.Func) + + // Collect positional args + args := make([]Object, 0, len(n.Args)) + for _, arg := range n.Args { + if st, ok := arg.(*Starred); ok { + expanded := e.iterateObj(e.eval(st.Value)) + args = append(args, expanded...) + } else { + args = append(args, e.eval(arg)) + } + } + + // Collect keyword args + kwargs := make(map[string]Object) + for _, kw := range n.Keywords { + if kw.Arg == "" { + // **unpack + val := e.eval(kw.Value) + if d, ok := val.(*PyDict); ok { + for i, k := range d.keys { + if ks, ok2 := k.(*PyStr); ok2 { + kwargs[ks.v] = d.vals[i] + } + } + } + } else { + kwargs[kw.Arg] = e.eval(kw.Value) + } + } + + // Extra *args (from Starargs field) + for _, sa := range n.Starargs { + expanded := e.iterateObj(e.eval(sa)) + args = append(args, expanded...) + } + + // Extra **kwargs + for _, ka := range n.Kwargs { + val := e.eval(ka) + if d, ok := val.(*PyDict); ok { + for i, k := range d.keys { + if ks, ok2 := k.(*PyStr); ok2 { + kwargs[ks.v] = d.vals[i] + } + } + } + } + + if len(kwargs) == 0 { + kwargs = nil + } + + // Special handling for super() — need to wire up __class__ and self + if isName(n.Func, "super") && len(args) == 0 { + return e.resolveSuper() + } + + return e.callObject(fn, args, kwargs) +} + +func isName(expr Expr, name string) bool { + if ne, ok := expr.(*NameExpr); ok { + return ne.Id == name + } + return false +} + +func (e *Evaluator) resolveSuper() Object { + // Walk scope chain to find __class__ and the first arg + scope := e.scope + for scope != nil { + if cls := scope.class; cls != nil { + // Find 'self' — the first argument of the enclosing function + // Look for it in the scope vars + if self, ok2 := scope.vars["self"]; ok2 { + return &PySuper{Class: cls, Obj: self} + } + // Try parent scope + if scope.parent != nil { + if self, ok2 := scope.parent.vars["self"]; ok2 { + return &PySuper{Class: cls, Obj: self} + } + } + return &PySuper{Class: cls} + } + scope = scope.parent + } + return &PySuper{} +} + +func (e *Evaluator) evalAttribute(n *AttributeExpr) Object { + obj := e.eval(n.Value) + val, ok := getAttr(obj, n.Attr) + if !ok { + raiseAttributeError(obj.pyType().Name, n.Attr) + } + return val +} + +func (e *Evaluator) evalSubscript(n *SubscriptExpr) Object { + obj := e.eval(n.Value) + + // Check if it's a slice + if sl, ok := n.Slice.(*SliceExpr); ok { + return e.getSlice(obj, sl) + } + + key := e.eval(n.Slice) + return e.getItem(obj, key) +} + +func (e *Evaluator) getItem(obj Object, key Object) Object { + switch v := obj.(type) { + case *PyList: + idx := int(toIntVal(key)) + idx = normalizeIndex(idx, len(v.items)) + if idx < 0 || idx >= len(v.items) { + raiseIndexError("list index out of range") + } + return v.items[idx] + case *PyTuple: + idx := int(toIntVal(key)) + idx = normalizeIndex(idx, len(v.items)) + if idx < 0 || idx >= len(v.items) { + raiseIndexError("tuple index out of range") + } + return v.items[idx] + case *PyStr: + runes := []rune(v.v) + idx := int(toIntVal(key)) + idx = normalizeIndex(idx, len(runes)) + if idx < 0 || idx >= len(runes) { + raiseIndexError("string index out of range") + } + return pyStr(string(runes[idx])) + case *PyBytes: + idx := int(toIntVal(key)) + idx = normalizeIndex(idx, len(v.v)) + if idx < 0 || idx >= len(v.v) { + raiseIndexError("index out of range") + } + return pyInt(int64(v.v[idx])) + case *PyDict: + val, ok := v.get(key) + if !ok { + raiseKeyError(key) + } + return val + case *PyInstance: + if fn, ok2 := v.lookupMethod("__getitem__"); ok2 { + return e.callObject(fn, []Object{v, key}, nil) + } + raiseTypeError("'%s' object is not subscriptable", v.Class.Name) + } + raiseTypeError("'%s' object is not subscriptable", obj.pyType().Name) + return nil +} + +func (e *Evaluator) evalSlice(n *SliceExpr) Object { + // Returns a PySlice-like tuple: we use a PyTuple with marker + // Actually return a *PySlice object represented as a special object + return e.buildSliceObj(n) +} + +// pySliceObj is a Python slice object (for use as subscript) +type pySliceObj struct { + lower, upper, step Object +} + +func (s *pySliceObj) pyType() *PyType { return typeSlice } +func (s *pySliceObj) pyRepr() string { + return fmt.Sprintf("slice(%v, %v, %v)", s.lower, s.upper, s.step) +} +func (s *pySliceObj) pyStr() string { return s.pyRepr() } + +func (e *Evaluator) buildSliceObj(n *SliceExpr) *pySliceObj { + var lower, upper, step Object = pyNone, pyNone, pyNone + if n.Lower != nil { + lower = e.eval(n.Lower) + } + if n.Upper != nil { + upper = e.eval(n.Upper) + } + if n.Step != nil { + step = e.eval(n.Step) + } + return &pySliceObj{lower: lower, upper: upper, step: step} +} + +func (e *Evaluator) getSlice(obj Object, n *SliceExpr) Object { + sl := e.buildSliceObj(n) + + switch v := obj.(type) { + case *PyList: + start, stop, step := resolveSlice(sl, len(v.items)) + return pyList(sliceItems(v.items, start, stop, step)) + case *PyTuple: + start, stop, step := resolveSlice(sl, len(v.items)) + return pyTuple(sliceItems(v.items, start, stop, step)) + case *PyStr: + runes := []rune(v.v) + start, stop, step := resolveSlice(sl, len(runes)) + sliced := sliceRunes(runes, start, stop, step) + return pyStr(string(sliced)) + case *PyBytes: + start, stop, step := resolveSlice(sl, len(v.v)) + sliced := sliceBytes(v.v, start, stop, step) + return pyBytes(sliced) + case *PyInstance: + sliceObj := sl + if fn, ok := v.lookupMethod("__getitem__"); ok { + return e.callObject(fn, []Object{v, sliceObj}, nil) + } + } + raiseTypeError("'%s' object is not subscriptable", obj.pyType().Name) + return nil +} + +func resolveSlice(sl *pySliceObj, length int) (start, stop, step int) { + step = 1 + if sl.step != pyNone && sl.step != nil { + step = int(toIntVal(sl.step)) + } + if step == 0 { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "slice step cannot be zero")}) + } + + if step > 0 { + start = 0 + stop = length + } else { + start = length - 1 + stop = -length - 1 + } + + if sl.lower != pyNone && sl.lower != nil { + start = int(toIntVal(sl.lower)) + if start < 0 { + start += length + } + } + if sl.upper != pyNone && sl.upper != nil { + stop = int(toIntVal(sl.upper)) + if stop < 0 { + stop += length + } + } + return start, stop, step +} + +func sliceItems(items []Object, start, stop, step int) []Object { + var result []Object + if step > 0 { + for i := start; i < stop && i < len(items); i += step { + if i >= 0 { + result = append(result, items[i]) + } + } + } else { + for i := start; i > stop && i >= 0; i += step { + if i < len(items) { + result = append(result, items[i]) + } + } + } + return result +} + +func sliceRunes(runes []rune, start, stop, step int) []rune { + var result []rune + if step > 0 { + for i := start; i < stop && i < len(runes); i += step { + if i >= 0 { + result = append(result, runes[i]) + } + } + } else { + for i := start; i > stop && i >= 0; i += step { + if i < len(runes) { + result = append(result, runes[i]) + } + } + } + return result +} + +func sliceBytes(b []byte, start, stop, step int) []byte { + var result []byte + if step > 0 { + for i := start; i < stop && i < len(b); i += step { + if i >= 0 { + result = append(result, b[i]) + } + } + } else { + for i := start; i > stop && i >= 0; i += step { + if i < len(b) { + result = append(result, b[i]) + } + } + } + return result +} + +func (e *Evaluator) setItem(obj Object, key Object, val Object) { + switch v := obj.(type) { + case *PyList: + // Handle slice assignment + if sl, ok := key.(*pySliceObj); ok { + start, stop, step := resolveSlice(sl, len(v.items)) + if step != 1 { + // Extended slice assignment not fully supported + raiseTypeError("extended slice assignment not supported") + } + newItems := e.iterateObj(val) + if start < 0 { + start = 0 + } + if stop > len(v.items) { + stop = len(v.items) + } + if stop < start { + stop = start + } + result := make([]Object, 0, len(v.items)-(stop-start)+len(newItems)) + result = append(result, v.items[:start]...) + result = append(result, newItems...) + result = append(result, v.items[stop:]...) + v.items = result + return + } + idx := int(toIntVal(key)) + idx = normalizeIndex(idx, len(v.items)) + if idx < 0 || idx >= len(v.items) { + raiseIndexError("list assignment index out of range") + } + v.items[idx] = val + case *PyDict: + v.set(key, val) + case *PyInstance: + if fn, ok2 := v.lookupMethod("__setitem__"); ok2 { + e.callObject(fn, []Object{v, key, val}, nil) + return + } + raiseTypeError("'%s' object does not support item assignment", v.Class.Name) + default: + raiseTypeError("'%s' object does not support item assignment", obj.pyType().Name) + } +} + +func (e *Evaluator) evalName(n *NameExpr) Object { + val, ok := e.scope.get(n.Id) + if !ok { + // Check globals + if val2, ok2 := e.globals[n.Id]; ok2 { + return val2 + } + raiseNameError(n.Id) + } + return val +} + +func (e *Evaluator) evalConstant(n *Constant) Object { + if n.Value == nil { + return pyNone + } + switch v := n.Value.(type) { + case int64: + return pyInt(v) + case float64: + return pyFloat(v) + case string: + return pyStr(v) + case []byte: + return pyBytes(v) + case bool: + return pyBool(v) + } + return pyNone +} + +func (e *Evaluator) evalList(n *ListExpr) Object { + items := make([]Object, 0, len(n.Elts)) + for _, elt := range n.Elts { + if st, ok := elt.(*Starred); ok { + expanded := e.iterateObj(e.eval(st.Value)) + items = append(items, expanded...) + } else { + items = append(items, e.eval(elt)) + } + } + return pyList(items) +} + +func (e *Evaluator) evalTuple(n *TupleExpr) Object { + items := make([]Object, 0, len(n.Elts)) + for _, elt := range n.Elts { + if st, ok := elt.(*Starred); ok { + expanded := e.iterateObj(e.eval(st.Value)) + items = append(items, expanded...) + } else { + items = append(items, e.eval(elt)) + } + } + return pyTuple(items) +} + +func (e *Evaluator) evalDict(n *DictExpr) Object { + d := pyDict() + for i, keyExpr := range n.Keys { + valObj := e.eval(n.Values[i]) + if keyExpr == nil { + // **unpack + if src, ok := valObj.(*PyDict); ok { + for j, k := range src.keys { + d.set(k, src.vals[j]) + } + } + } else { + keyObj := e.eval(keyExpr) + d.set(keyObj, valObj) + } + } + return d +} + +func (e *Evaluator) evalSet(n *SetExpr) Object { + items := make([]Object, 0, len(n.Elts)) + for _, elt := range n.Elts { + items = append(items, e.eval(elt)) + } + s, err := pySet(items) + if err != nil { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "%v", err)}) + } + return s +} + +func (e *Evaluator) evalIfExp(n *IfExp) Object { + if pyTruth(e.eval(n.Test)) { + return e.eval(n.Body) + } + return e.eval(n.Orelse) +} + +func (e *Evaluator) evalLambda(n *Lambda) Object { + // Evaluate defaults in current scope + defaults := make([]Object, len(n.Args.Defaults)) + for i, d := range n.Args.Defaults { + defaults[i] = e.eval(d) + } + // Lambda body is a single expression, wrap in return + body := []Stmt{&ReturnStmt{Value: n.Body}} + return &PyFunction{ + Name: "", + Args: n.Args, + Body: body, + Closure: e.scope, + Globals: e.globals, + Defaults: defaults, + } +} + +func (e *Evaluator) evalListComp(n *ListComp) Object { + items := e.evalComprehension(n.Elt, n.Generators) + return pyList(items) +} + +func (e *Evaluator) evalSetComp(n *SetComp) Object { + items := e.evalComprehension(n.Elt, n.Generators) + s, err := pySet(items) + if err != nil { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "%v", err)}) + } + return s +} + +func (e *Evaluator) evalDictComp(n *DictComp) Object { + d := pyDict() + e.evalDictCompHelper(n.Key, n.Value, n.Generators, 0, d) + return d +} + +func (e *Evaluator) evalDictCompHelper(keyExpr, valExpr Expr, gens []*Comprehension, depth int, d *PyDict) { + if depth >= len(gens) { + k := e.eval(keyExpr) + v := e.eval(valExpr) + d.set(k, v) + return + } + gen := gens[depth] + items := e.iterateObj(e.eval(gen.Iter)) + for _, item := range items { + e.assign(gen.Target, item) + pass := true + for _, cond := range gen.Ifs { + if !pyTruth(e.eval(cond)) { + pass = false + break + } + } + if pass { + e.evalDictCompHelper(keyExpr, valExpr, gens, depth+1, d) + } + } +} + +func (e *Evaluator) evalComprehension(eltExpr Expr, gens []*Comprehension) []Object { + // Run in a new child scope (list comprehensions have their own scope in Python 3) + childScope := newFunctionScope(e.scope, e.globals, "") + child := &Evaluator{ + ctx: e.ctx, + scope: childScope, + globals: e.globals, + opts: e.opts, + modules: e.modules, + depth: e.depth, + } + + var result []Object + child.evalCompHelper(eltExpr, gens, 0, &result) + return result +} + +func (e *Evaluator) evalCompHelper(eltExpr Expr, gens []*Comprehension, depth int, result *[]Object) { + if depth >= len(gens) { + *result = append(*result, e.eval(eltExpr)) + return + } + gen := gens[depth] + items := e.iterateObj(e.eval(gen.Iter)) + for _, item := range items { + e.assign(gen.Target, item) + pass := true + for _, cond := range gen.Ifs { + if !pyTruth(e.eval(cond)) { + pass = false + break + } + } + if pass { + e.evalCompHelper(eltExpr, gens, depth+1, result) + } + } +} + +func (e *Evaluator) evalGeneratorExp(n *GeneratorExp) Object { + // Eagerly evaluate the first iterator (per Python semantics), create a generator + if len(n.Generators) == 0 { + return &PyGenerator{name: "", sendCh: make(chan Object), yieldCh: make(chan Object), ctx: e.ctx} + } + + // Capture first iterator in current scope + firstIter := e.eval(n.Generators[0].Iter) + + // Create a fake function body that yields from the comprehension + // We implement this as a real generator that runs the comprehension + g := &PyGenerator{ + name: "", + sendCh: make(chan Object, 0), + yieldCh: make(chan Object, 0), + excCh: make(chan *PyException, 1), + ctx: e.ctx, + } + + childScope := newFunctionScope(e.scope, e.globals, "") + childEval := &Evaluator{ + ctx: e.ctx, + scope: childScope, + globals: e.globals, + opts: e.opts, + modules: e.modules, + genState: &genChannels{ + sendCh: g.sendCh, + yieldCh: g.yieldCh, + ctx: e.ctx, + }, + } + + // Copy generators but replace first iter with already-evaluated value + gens := make([]*Comprehension, len(n.Generators)) + copy(gens, n.Generators) + firstItems := childEval.iterateObj(firstIter) + + go func() { + // Register this goroutine's callObject so that map/filter/sorted with + // user-defined key functions work correctly inside generator expressions. + if gid, ok := goroutineID(); ok { + goroutineCallFns.Store(gid, func(fn Object, args []Object, kwargs map[string]Object) Object { + return childEval.callObject(fn, args, kwargs) + }) + defer goroutineCallFns.Delete(gid) + } + defer close(g.yieldCh) + defer func() { + r := recover() + if r == nil { + return + } + if sig, ok := r.(exceptionSignal); ok { + if exceptionMatchesClass(sig.exc, ExcStopIteration) { + return + } + if exceptionMatchesClass(sig.exc, ExcGeneratorExit) { + return + } + // Non-StopIteration Python exception: propagate to the caller via excCh. + select { + case g.excCh <- sig.exc: + default: + } + return + } + if _, ok := r.(controlSignal); ok { + // controlSignal for return is normal completion. + return + } + // Real Go panic — re-panic so it is not silently swallowed. + panic(r) + }() + + childEval.evalGenExpHelper(n.Elt, firstItems, gens, 0) + }() + + return g +} + +func (e *Evaluator) evalGenExpHelper(eltExpr Expr, firstItems []Object, gens []*Comprehension, depth int) { + if depth >= len(gens) { + val := e.eval(eltExpr) + select { + case e.genState.yieldCh <- val: + case <-e.ctx.Done(): + return + } + select { + case _, ok := <-e.genState.sendCh: + if !ok { + return + } + case <-e.ctx.Done(): + return + } + return + } + + gen := gens[depth] + var items []Object + if depth == 0 { + items = firstItems + } else { + items = e.iterateObj(e.eval(gen.Iter)) + } + + for _, item := range items { + e.assign(gen.Target, item) + pass := true + for _, cond := range gen.Ifs { + if !pyTruth(e.eval(cond)) { + pass = false + break + } + } + if pass { + e.evalGenExpHelper(eltExpr, nil, gens, depth+1) + } + } +} + +func (e *Evaluator) evalYield(n *Yield) Object { + if e.genState == nil { + raiseTypeError("'yield' outside function") + } + var val Object = pyNone + if n.Value != nil { + val = e.eval(n.Value) + } + select { + case e.genState.yieldCh <- val: + case <-e.ctx.Done(): + panic(exceptionSignal{exc: newExceptionf(ExcKeyboardInterrupt, "cancelled")}) + } + select { + case sent, ok := <-e.genState.sendCh: + if !ok { + panic(exceptionSignal{exc: newExceptionf(ExcGeneratorExit, "generator closed")}) + } + return sent + case <-e.ctx.Done(): + panic(exceptionSignal{exc: newExceptionf(ExcKeyboardInterrupt, "cancelled")}) + } +} + +func (e *Evaluator) evalYieldFrom(n *YieldFrom) Object { + if e.genState == nil { + raiseTypeError("'yield from' outside function") + } + sub := e.eval(n.Value) + for { + val, ok := e.nextFromIter(sub) + if !ok { + break + } + select { + case e.genState.yieldCh <- val: + case <-e.ctx.Done(): + panic(exceptionSignal{exc: newExceptionf(ExcKeyboardInterrupt, "cancelled")}) + } + select { + case _, ok2 := <-e.genState.sendCh: + if !ok2 { + panic(exceptionSignal{exc: newExceptionf(ExcGeneratorExit, "generator closed")}) + } + case <-e.ctx.Done(): + panic(exceptionSignal{exc: newExceptionf(ExcKeyboardInterrupt, "cancelled")}) + } + } + return pyNone +} + +// ---- Function calling ---- + +// callObject dispatches a call to the appropriate handler. +func (e *Evaluator) callObject(fn Object, args []Object, kwargs map[string]Object) Object { + switch f := fn.(type) { + case *PyBuiltin: + if kwargs == nil { + kwargs = map[string]Object{} + } + return f.Fn(args, kwargs) + case *PyFunction: + return e.callFunction(f, args, kwargs) + case *PyBoundMethod: + allArgs := make([]Object, 0, 1+len(args)) + allArgs = append(allArgs, f.Self) + allArgs = append(allArgs, args...) + return e.callFunction(f.Func, allArgs, kwargs) + case *PyClass: + return e.callClass(f, args, kwargs) + case *PyInstance: + // __call__ + if meth, ok := f.lookupMethod("__call__"); ok { + allArgs := make([]Object, 0, 1+len(args)) + allArgs = append(allArgs, f) + allArgs = append(allArgs, args...) + return e.callObject(meth, allArgs, kwargs) + } + raiseTypeError("'%s' object is not callable", f.Class.Name) + case *PyType: + // Built-in type constructors: int, str, float, etc. are registered as PyBuiltin + // This path should rarely be hit + raiseTypeError("type '%s' object is not callable this way", f.Name) + } + raiseTypeError("'%s' object is not callable", fn.pyType().Name) + return nil +} + +func (e *Evaluator) callClass(cls *PyClass, args []Object, kwargs map[string]Object) Object { + // If the class is an exception class (subclass of BaseException), + // instantiate it as a *PyException for proper raise/except semantics. + if classIsException(cls) { + exc := &PyException{ExcClass: cls, Args: args, Dict: make(map[string]Object)} + // Run custom __init__ if present (for user-defined exception classes). + if initFn, ok := cls.lookupInMRO("__init__"); ok { + // Wrap exc in an adapter so __init__ can set attributes on it. + allArgs := make([]Object, 0, 1+len(args)) + allArgs = append(allArgs, exc) + allArgs = append(allArgs, args...) + e.callObject(initFn, allArgs, kwargs) + } + return exc + } + + inst := &PyInstance{Class: cls, Dict: make(map[string]Object)} + + // Look up __init__ in MRO + if initFn, ok := cls.lookupInMRO("__init__"); ok { + allArgs := make([]Object, 0, 1+len(args)) + allArgs = append(allArgs, inst) + allArgs = append(allArgs, args...) + e.callObject(initFn, allArgs, kwargs) + } + return inst +} + +// classIsException returns true if cls is a built-in exception class (one of the +// ExcBaseException singletons or a subclass thereof in the singleton hierarchy). +func classIsException(cls *PyClass) bool { + for _, c := range cls.MRO { + if c == ExcBaseException { + return true + } + } + return false +} + +// lookupInMRO looks for a method in the class MRO. +func (cls *PyClass) lookupInMRO(name string) (Object, bool) { + for _, c := range cls.MRO { + if v, ok := c.Dict[name]; ok { + return v, true + } + } + return nil, false +} + +func (e *Evaluator) callFunction(fn *PyFunction, args []Object, kwargs map[string]Object) Object { + if e.depth >= maxCallDepth { + panic(exceptionSignal{exc: newExceptionf(ExcRecursionError, "maximum recursion depth exceeded")}) + } + + // Build function scope as child of closure (not current scope — lexical scoping) + funcScope := newFunctionScope(fn.Closure, fn.Globals, fn.Name) + + // Match args to parameters + e.bindArgs(fn, funcScope, args, kwargs) + + if fn.IsGen { + return e.makeGenerator(fn, funcScope) + } + + // Execute function body + child := &Evaluator{ + ctx: e.ctx, + scope: funcScope, + globals: fn.Globals, + opts: e.opts, + modules: e.modules, + depth: e.depth + 1, + } + + var retVal Object = pyNone + func() { + defer func() { + r := recover() + if r == nil { + return + } + if sig, ok := r.(controlSignal); ok && sig.kind == ctrlReturn { + retVal = sig.value + return + } + panic(r) + }() + child.exec(fn.Body) + }() + return retVal +} + +func (e *Evaluator) bindArgs(fn *PyFunction, scope *Scope, args []Object, kwargs map[string]Object) { + params := fn.Args + nRequired := len(params.Args) - len(fn.Defaults) + + posIdx := 0 + for i, param := range params.Args { + if posIdx < len(args) { + scope.set(param, args[posIdx]) + posIdx++ + } else if kv, ok := kwargs[param]; ok { + scope.set(param, kv) + delete(kwargs, param) + } else if i >= nRequired { + // Has default + defIdx := i - nRequired + scope.set(param, fn.Defaults[defIdx]) + } else { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "%s() missing required argument: '%s'", fn.Name, param)}) + } + } + + // Handle *args + if params.Vararg != "" { + varargs := make([]Object, 0) + for posIdx < len(args) { + varargs = append(varargs, args[posIdx]) + posIdx++ + } + scope.set(params.Vararg, pyTuple(varargs)) + } else if posIdx < len(args) { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "%s() takes %d positional argument(s) but %d were given", fn.Name, len(params.Args), len(args))}) + } + + // Keyword-only args + for i, kw := range params.KwOnly { + if kv, ok := kwargs[kw]; ok { + scope.set(kw, kv) + delete(kwargs, kw) + } else if fn.KwDefaults != nil { + if def, ok2 := fn.KwDefaults[kw]; ok2 { + scope.set(kw, def) + } else if i < len(fn.Args.KwDefaults) && fn.Args.KwDefaults[i] == nil { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "%s() missing keyword-only argument: '%s'", fn.Name, kw)}) + } + } + } + + // Handle **kwargs + if params.Kwarg != "" { + kwargsDict := pyDict() + for k, v := range kwargs { + kwargsDict.set(pyStr(k), v) + } + scope.set(params.Kwarg, kwargsDict) + } else if len(kwargs) > 0 { + // Report first unexpected kwarg + for k := range kwargs { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "%s() got an unexpected keyword argument '%s'", fn.Name, k)}) + } + } +} + +func (e *Evaluator) makeGenerator(fn *PyFunction, scope *Scope) *PyGenerator { + g := &PyGenerator{ + name: fn.Name, + sendCh: make(chan Object, 0), + yieldCh: make(chan Object, 0), + excCh: make(chan *PyException, 1), + ctx: e.ctx, + } + + childEval := &Evaluator{ + ctx: e.ctx, + scope: scope, + globals: fn.Globals, + opts: e.opts, + modules: e.modules, + depth: e.depth + 1, + genState: &genChannels{ + sendCh: g.sendCh, + yieldCh: g.yieldCh, + ctx: e.ctx, + }, + } + + go func() { + // Register this goroutine's callObject so that map/filter/sorted with + // user-defined key functions work correctly inside generator bodies. + if gid, ok := goroutineID(); ok { + goroutineCallFns.Store(gid, func(fn Object, args []Object, kwargs map[string]Object) Object { + return childEval.callObject(fn, args, kwargs) + }) + defer goroutineCallFns.Delete(gid) + } + defer close(g.yieldCh) + defer func() { + r := recover() + if r == nil { + return + } + if sig, ok := r.(exceptionSignal); ok { + if exceptionMatchesClass(sig.exc, ExcStopIteration) { + return + } + if exceptionMatchesClass(sig.exc, ExcGeneratorExit) { + return + } + // Non-StopIteration Python exception: propagate to the caller of + // next(g) via excCh so it is not silently swallowed. + select { + case g.excCh <- sig.exc: + default: + } + return + } + if _, ok := r.(controlSignal); ok { + // return from generator is normal completion. + return + } + // Real Go panic (nil pointer, index OOB, etc.) — re-panic so it is + // not silently swallowed. + panic(r) + }() + childEval.exec(fn.Body) + }() + + return g +} + +// ---- Iteration helpers ---- + +// iterateObj materializes an iterable into a slice. +func (e *Evaluator) iterateObj(obj Object) []Object { + switch v := obj.(type) { + case *PyInstance: + if fn, ok := v.lookupMethod("__iter__"); ok { + iterObj := e.callObject(fn, []Object{v}, nil) + return e.drainIter(iterObj) + } + if fn, ok := v.lookupMethod("__getitem__"); ok { + // Legacy iteration protocol + var items []Object + for i := 0; ; i++ { + func() { + defer func() { + r := recover() + if r != nil { + if sig, ok2 := r.(exceptionSignal); ok2 { + if exceptionMatchesClass(sig.exc, ExcIndexError) || exceptionMatchesClass(sig.exc, ExcStopIteration) { + items = nil // sentinel to stop + return + } + } + panic(r) + } + }() + val := e.callObject(fn, []Object{v, pyInt(int64(i))}, nil) + items = append(items, val) + }() + if items == nil { + break + } + } + if items == nil { + return []Object{} + } + return items + } + raiseTypeError("'%s' object is not iterable", v.Class.Name) + case *PyListIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + } + return result + case *PyDictKeyIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + } + return result + case *rangeIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + } + return result + } + return collectIterable(obj) +} + +func (e *Evaluator) drainIter(iterObj Object) []Object { + var result []Object + for { + val, ok := e.nextFromIter(iterObj) + if !ok { + break + } + result = append(result, val) + if len(result) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "iterable produced too many items (limit %d)", maxGeneratorItems)}) + } + } + return result +} + +// nextFromIter advances an iterator by one step. +func (e *Evaluator) nextFromIter(obj Object) (Object, bool) { + switch v := obj.(type) { + case *rangeIter: + return v.next() + case *PyMapIter: + return v.next() + case *PyFilterIter: + return v.next() + case *PyZipIter: + return v.next() + case *PyEnumerateIter: + return v.next() + case *PyReversedIter: + return v.next() + case *PyListIter: + return v.next() + case *PyDictKeyIter: + return v.next() + case *PyGenerator: + return e.nextFromGenerator(v) + case *PyInstance: + if fn, ok := v.lookupMethod("__next__"); ok { + var val Object + done := false + func() { + defer func() { + r := recover() + if r == nil { + return + } + if sig, ok2 := r.(exceptionSignal); ok2 { + if exceptionMatchesClass(sig.exc, ExcStopIteration) { + done = true + return + } + } + panic(r) + }() + val = e.callObject(fn, []Object{v}, nil) + }() + if done { + return nil, false + } + return val, true + } + if fn, ok := v.lookupMethod("__iter__"); ok { + iterObj := e.callObject(fn, []Object{v}, nil) + return e.nextFromIter(iterObj) + } + } + return nextFromIterable(obj) +} + +func (e *Evaluator) nextFromGenerator(g *PyGenerator) (Object, bool) { + if g.done { + return nil, false + } + // If the generator is waiting for a sendCh kick (it has yielded and is + // blocked on sendCh), send None to advance it before receiving the next value. + if g.awaitingSend { + select { + case g.sendCh <- pyNone: + g.awaitingSend = false + case <-e.ctx.Done(): + g.done = true + return nil, false + } + } + select { + case val, ok := <-g.yieldCh: + if !ok { + g.done = true + // Check if the generator exited with a non-StopIteration exception. + // The generator goroutine sends the exception on excCh before closing + // yieldCh (via defer), so by the time we see the channel close the + // exception (if any) is already in excCh. + if g.excCh != nil { + select { + case exc := <-g.excCh: + panic(exceptionSignal{exc: exc}) + default: + } + } + return nil, false + } + g.awaitingSend = true + return val, true + case <-e.ctx.Done(): + g.done = true + return nil, false + } +} + +// callMethod calls a named method on an object, returning the result. +func (e *Evaluator) callMethod(obj Object, name string, args []Object, kwargs map[string]Object) Object { + val, ok := getAttr(obj, name) + if !ok { + raiseAttributeError(obj.pyType().Name, name) + } + // If it's a bound method or function in a class, prepend self + switch fn := val.(type) { + case *PyFunction: + allArgs := make([]Object, 0, 1+len(args)) + allArgs = append(allArgs, obj) + allArgs = append(allArgs, args...) + return e.callFunction(fn, allArgs, kwargs) + default: + return e.callObject(fn, args, kwargs) + } +} + +// ---- Assignment helpers ---- + +func (e *Evaluator) assign(target Expr, value Object) { + switch t := target.(type) { + case *NameExpr: + e.scope.set(t.Id, value) + case *AttributeExpr: + obj := e.eval(t.Value) + setAttr(obj, t.Attr, value) + case *SubscriptExpr: + obj := e.eval(t.Value) + if sl, ok := t.Slice.(*SliceExpr); ok { + key := e.buildSliceObj(sl) + e.setItem(obj, key, value) + } else { + key := e.eval(t.Slice) + e.setItem(obj, key, value) + } + case *Starred: + raiseTypeError("starred assignment target must be in a list or tuple") + case *TupleExpr: + e.unpackAssign(t.Elts, value) + case *ListExpr: + e.unpackAssign(t.Elts, value) + } +} + +func (e *Evaluator) unpackAssign(elts []Expr, value Object) { + items := e.iterateObj(value) + + // Find starred position + starIdx := -1 + for i, elt := range elts { + if _, ok := elt.(*Starred); ok { + starIdx = i + break + } + } + + if starIdx == -1 { + if len(items) != len(elts) { + if len(items) < len(elts) { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "not enough values to unpack (expected %d, got %d)", len(elts), len(items))}) + } + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "too many values to unpack (expected %d)", len(elts))}) + } + for i, elt := range elts { + e.assign(elt, items[i]) + } + } else { + before := elts[:starIdx] + after := elts[starIdx+1:] + minLen := len(before) + len(after) + if len(items) < minLen { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "not enough values to unpack")}) + } + for i, elt := range before { + e.assign(elt, items[i]) + } + starItems := items[len(before) : len(items)-len(after)] + e.assign(elts[starIdx].(*Starred).Value, pyList(starItems)) + for i, elt := range after { + e.assign(elt, items[len(items)-len(after)+i]) + } + } +} + +// ---- Utility helpers ---- + +// normBool converts *PyBool to *PyInt for arithmetic. +func normBool(obj Object) Object { + if b, ok := obj.(*PyBool); ok { + if b.v { + return pyInt(1) + } + return pyInt(0) + } + return obj +} + +// toFloatVal converts any numeric to float64. +func toFloatVal(obj Object) float64 { + switch v := obj.(type) { + case *PyFloat: + return v.v + case *PyInt: + if n, ok := v.int64(); ok { + return float64(n) + } + f, _ := new(big.Float).SetInt(v.big).Float64() + return f + case *PyBool: + if v.v { + return 1 + } + return 0 + } + raiseTypeError("must be real number, not '%s'", obj.pyType().Name) + return 0 +} + +// toOptInt tries to extract an int64 from an int-like object. +func toOptInt(obj Object) (int64, bool) { + switch v := obj.(type) { + case *PyInt: + if n, ok := v.int64(); ok { + return n, true + } + case *PyBool: + if v.v { + return 1, true + } + return 0, true + } + return 0, false +} + +// printTraceback prints a Python traceback to w. +func printTraceback(w io.Writer, exc *PyException) { + fmt.Fprintln(w, "Traceback (most recent call last):") + for _, frame := range exc.Traceback { + fmt.Fprintf(w, " File %q, line %d, in %s\n", frame.File, frame.Line, frame.Name) + } + msg := exc.pyStr() + if msg != "" { + fmt.Fprintf(w, "%s: %s\n", exc.ExcClass.Name, msg) + } else { + fmt.Fprintf(w, "%s\n", exc.ExcClass.Name) + } + + if exc.Cause != nil { + fmt.Fprintf(w, "\nThe above exception was the direct cause of the following exception:\n\n") + printTraceback(w, exc.Cause) + } else if exc.Context != nil { + fmt.Fprintf(w, "\nDuring handling of the above exception, another exception occurred:\n\n") + printTraceback(w, exc.Context) + } +} diff --git a/builtins/python/lexer.go b/builtins/python/lexer.go new file mode 100644 index 00000000..d7297315 --- /dev/null +++ b/builtins/python/lexer.go @@ -0,0 +1,739 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "fmt" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// TokenKind identifies the type of a lexical token. +type TokenKind int + +const ( + TokEOF TokenKind = iota + TokNewline // logical newline + TokIndent // indent increase + TokDedent // indent decrease + TokName // identifier or keyword + TokInt // integer literal + TokFloat // float literal + TokString // string literal + TokBytes // bytes literal + TokOp // operator or punctuation + TokComment // comment (callers typically ignore) +) + +// Token is a single lexical token. +type Token struct { + Kind TokenKind + Value string + Pos Pos +} + +// pythonKeywords is the set of Python keywords (emitted as TokName). +var pythonKeywords = map[string]bool{ + "if": true, "elif": true, "else": true, "for": true, "while": true, + "def": true, "class": true, "return": true, "import": true, "from": true, + "as": true, "with": true, "try": true, "except": true, "finally": true, + "raise": true, "pass": true, "break": true, "continue": true, + "and": true, "or": true, "not": true, "in": true, "is": true, + "global": true, "nonlocal": true, "del": true, "assert": true, + "lambda": true, "yield": true, "True": true, "False": true, "None": true, + "async": true, "await": true, +} + +// Lexer tokenizes Python source code. +type Lexer struct { + src []rune + pos int // current position in src + line int // 1-based + col int // 1-based + paren int // nesting depth of ( [ { + pending []Token + // indent stack: first entry is always "" + indentStack []string + // atLineStart tracks whether we need to process indentation on the next token + atLineStart bool + // afterNewline tracks whether the last emitted logical token was a newline + // (used to decide whether to emit INDENT/DEDENT) + lastWasNewline bool +} + +// NewLexer creates a new Lexer for the given source string. +func NewLexer(src string) *Lexer { + l := &Lexer{ + src: []rune(src), + line: 1, + col: 1, + indentStack: []string{""}, + atLineStart: true, + } + return l +} + +// Next consumes and returns the next token. +func (l *Lexer) Next() Token { + t := l.next() + return t +} + +// Peek returns the next token without consuming it. +func (l *Lexer) Peek() Token { + return l.PeekN(0) +} + +// PeekN peeks n tokens ahead (0 = next token). +func (l *Lexer) PeekN(n int) Token { + for len(l.pending) <= n { + l.pending = append(l.pending, l.next()) + } + return l.pending[n] +} + +// next reads the next token from the stream. +func (l *Lexer) next() Token { + if len(l.pending) > 0 { + t := l.pending[0] + l.pending = l.pending[1:] + return t + } + return l.readToken() +} + +// readToken is the core tokenizer. +func (l *Lexer) readToken() Token { + for { + // At start of a new line (when not inside brackets), handle indentation. + if l.atLineStart && l.paren == 0 { + l.atLineStart = false + toks := l.handleIndent() + if len(toks) > 0 { + // Queue all but first. + if len(toks) > 1 { + l.pending = append(toks[1:], l.pending...) + } + return toks[0] + } + } + + if l.pos >= len(l.src) { + // Emit pending DEDENTs before EOF. + if len(l.indentStack) > 1 { + l.indentStack = l.indentStack[:len(l.indentStack)-1] + pos := l.curPos() + // If we haven't emitted a newline before dedents, emit one. + if !l.lastWasNewline { + l.lastWasNewline = true + // queue the dedent + l.pending = append(l.pending, Token{Kind: TokDedent, Pos: pos}) + return Token{Kind: TokNewline, Pos: pos} + } + return Token{Kind: TokDedent, Pos: pos} + } + return Token{Kind: TokEOF, Pos: l.curPos()} + } + + ch := l.src[l.pos] + + // Line continuation. + if ch == '\\' && l.pos+1 < len(l.src) && l.src[l.pos+1] == '\n' { + l.pos += 2 + l.line++ + l.col = 1 + continue + } + + // Skip whitespace (not newlines, unless inside parens). + if ch == ' ' || ch == '\t' || ch == '\r' { + l.pos++ + if ch == '\t' { + // tab advances to next multiple of 8 + l.col = ((l.col-1)/8+1)*8 + 1 + } else { + l.col++ + } + continue + } + + // Comment. + if ch == '#' { + pos := l.curPos() + start := l.pos + for l.pos < len(l.src) && l.src[l.pos] != '\n' { + l.pos++ + l.col++ + } + _ = pos + _ = start + // skip comment, don't emit + continue + } + + // Newline. + if ch == '\n' { + pos := l.curPos() + l.pos++ + l.line++ + l.col = 1 + if l.paren > 0 { + // Inside brackets: implicit line continuation, skip newline. + continue + } + l.atLineStart = true + l.lastWasNewline = true + return Token{Kind: TokNewline, Value: "\n", Pos: pos} + } + + // String literals. + if isStringStart(l.src, l.pos) { + return l.readStringOrBytes() + } + + // Numbers. + if unicode.IsDigit(ch) || (ch == '.' && l.pos+1 < len(l.src) && unicode.IsDigit(l.src[l.pos+1])) { + return l.readNumber() + } + + // Identifiers and keywords. + if ch == '_' || unicode.IsLetter(ch) { + return l.readName() + } + + // Operators and punctuation. + return l.readOp() + } +} + +func (l *Lexer) curPos() Pos { + return Pos{Line: l.line, Col: l.col} +} + +// handleIndent processes indentation at the start of a line. +// Returns a (possibly empty) list of tokens to emit. +func (l *Lexer) handleIndent() []Token { + // Count leading whitespace. + indentStr := l.measureIndent() + + // Skip blank lines and comment-only lines. + pos := l.pos + len([]rune(indentStr)) + if pos < len(l.src) { + ch := l.src[pos] + if ch == '\n' || ch == '#' || ch == '\r' { + // Blank or comment line — consume the indent whitespace and the line. + l.advanceBy(len([]rune(indentStr))) + return nil + } + } else { + // End of file after whitespace — no indentation token needed. + l.advanceBy(len([]rune(indentStr))) + return nil + } + + // Consume the indent characters. + l.advanceBy(len([]rune(indentStr))) + + top := l.indentStack[len(l.indentStack)-1] + pos2 := l.curPos() + + if indentStr == top { + // Same level — no token. + return nil + } + + if strings.HasPrefix(indentStr, top) && indentStr != top { + // Deeper — emit INDENT. + l.indentStack = append(l.indentStack, indentStr) + l.lastWasNewline = false + return []Token{{Kind: TokIndent, Pos: pos2}} + } + + // Shallower — find the matching level and emit DEDENTs. + var toks []Token + for len(l.indentStack) > 1 { + l.indentStack = l.indentStack[:len(l.indentStack)-1] + toks = append(toks, Token{Kind: TokDedent, Pos: pos2}) + if l.indentStack[len(l.indentStack)-1] == indentStr { + break + } + } + if l.indentStack[len(l.indentStack)-1] != indentStr { + // Indentation error — we'll surface this as a dedent mismatch. + // For robustness, just emit what we have. + } + l.lastWasNewline = false + return toks +} + +// measureIndent returns the leading whitespace string of the current line +// without advancing the lexer position. +func (l *Lexer) measureIndent() string { + var buf strings.Builder + i := l.pos + for i < len(l.src) { + ch := l.src[i] + if ch == ' ' || ch == '\t' { + buf.WriteRune(ch) + i++ + } else { + break + } + } + return buf.String() +} + +// advanceBy moves the lexer position forward by n runes, updating col. +func (l *Lexer) advanceBy(n int) { + for i := 0; i < n && l.pos < len(l.src); i++ { + ch := l.src[l.pos] + l.pos++ + if ch == '\t' { + l.col = ((l.col-1)/8+1)*8 + 1 + } else { + l.col++ + } + } +} + +// isStringStart returns true if the position starts a string literal. +func isStringStart(src []rune, pos int) bool { + if pos >= len(src) { + return false + } + ch := src[pos] + if ch == '"' || ch == '\'' { + return true + } + // Check for string prefixes: r, b, f, u, rb, br, etc. + if (ch == 'r' || ch == 'R' || ch == 'b' || ch == 'B' || + ch == 'f' || ch == 'F' || ch == 'u' || ch == 'U') && + pos+1 < len(src) { + next := src[pos+1] + if next == '"' || next == '\'' { + return true + } + // Two-character prefix: rb, br, fr, rf + if (ch == 'r' || ch == 'R' || ch == 'b' || ch == 'B' || + ch == 'f' || ch == 'F') && pos+2 < len(src) { + if next == 'b' || next == 'B' || next == 'r' || next == 'R' || next == 'f' || next == 'F' { + if src[pos+2] == '"' || src[pos+2] == '\'' { + return true + } + } + } + } + return false +} + +// readStringOrBytes reads a string or bytes literal. +func (l *Lexer) readStringOrBytes() Token { + pos := l.curPos() + + // Collect prefix. + var prefix strings.Builder + for l.pos < len(l.src) { + ch := l.src[l.pos] + if ch == 'r' || ch == 'R' || ch == 'b' || ch == 'B' || + ch == 'f' || ch == 'F' || ch == 'u' || ch == 'U' { + prefix.WriteRune(ch) + l.pos++ + l.col++ + } else { + break + } + } + prefixStr := strings.ToLower(prefix.String()) + isRaw := strings.ContainsRune(prefixStr, 'r') + isBytes := strings.ContainsRune(prefixStr, 'b') + + if l.pos >= len(l.src) { + return Token{Kind: TokString, Value: "", Pos: pos} + } + + quote := l.src[l.pos] + l.pos++ + l.col++ + + // Check for triple quote. + triple := false + if l.pos+1 < len(l.src) && l.src[l.pos] == quote && l.src[l.pos+1] == quote { + triple = true + l.pos += 2 + l.col += 2 + } + + var buf strings.Builder + for l.pos < len(l.src) { + ch := l.src[l.pos] + + if triple { + if ch == quote && l.pos+2 < len(l.src) && l.src[l.pos+1] == quote && l.src[l.pos+2] == quote { + l.pos += 3 + l.col += 3 + break + } + } else { + if ch == quote { + l.pos++ + l.col++ + break + } + if ch == '\n' { + // Unterminated string. + break + } + } + + if ch == '\\' && !isRaw { + l.pos++ + l.col++ + if l.pos >= len(l.src) { + break + } + esc := l.src[l.pos] + l.pos++ + l.col++ + switch esc { + case 'n': + buf.WriteByte('\n') + case 't': + buf.WriteByte('\t') + case 'r': + buf.WriteByte('\r') + case '\\': + buf.WriteByte('\\') + case '\'': + buf.WriteByte('\'') + case '"': + buf.WriteByte('"') + case '0': + buf.WriteByte(0) + case 'a': + buf.WriteByte('\a') + case 'b': + buf.WriteByte('\b') + case 'f': + buf.WriteByte('\f') + case 'v': + buf.WriteByte('\v') + case '\n': + // line continuation inside string + l.line++ + l.col = 1 + case 'x': + // \xNN + if l.pos+1 < len(l.src) { + hexStr := string(l.src[l.pos : l.pos+2]) + if v, err := strconv.ParseUint(hexStr, 16, 8); err == nil { + buf.WriteByte(byte(v)) + l.pos += 2 + l.col += 2 + } else { + buf.WriteByte('\\') + buf.WriteRune('x') + } + } + case 'u': + // \uNNNN + if l.pos+3 < len(l.src) { + hexStr := string(l.src[l.pos : l.pos+4]) + if v, err := strconv.ParseUint(hexStr, 16, 16); err == nil { + buf.WriteRune(rune(v)) + l.pos += 4 + l.col += 4 + } else { + buf.WriteByte('\\') + buf.WriteRune('u') + } + } + case 'U': + // \UNNNNNNNN + if l.pos+7 < len(l.src) { + hexStr := string(l.src[l.pos : l.pos+8]) + if v, err := strconv.ParseUint(hexStr, 16, 32); err == nil && v <= unicode.MaxRune { + buf.WriteRune(rune(v)) + l.pos += 8 + l.col += 8 + } else { + buf.WriteByte('\\') + buf.WriteRune('U') + } + } + case 'N': + // \N{name} — unicode name, skip for now + buf.WriteByte('\\') + buf.WriteRune('N') + default: + buf.WriteByte('\\') + buf.WriteRune(esc) + } + } else { + if ch == '\n' { + l.line++ + l.col = 1 + } else { + l.col++ + } + buf.WriteRune(ch) + l.pos++ + } + } + + kind := TokString + if isBytes { + kind = TokBytes + } + return Token{Kind: kind, Value: buf.String(), Pos: pos} +} + +// readNumber reads an integer or float literal. +func (l *Lexer) readNumber() Token { + pos := l.curPos() + start := l.pos + + // Check for special bases. + if l.src[l.pos] == '0' && l.pos+1 < len(l.src) { + next := l.src[l.pos+1] + if next == 'x' || next == 'X' { + return l.readHex(pos, start) + } + if next == 'o' || next == 'O' { + return l.readOctal(pos, start) + } + if next == 'b' || next == 'B' { + return l.readBinary(pos, start) + } + } + + // Decimal integer or float. + isFloat := false + for l.pos < len(l.src) { + ch := l.src[l.pos] + if unicode.IsDigit(ch) || ch == '_' { + l.pos++ + l.col++ + } else if ch == '.' && !isFloat { + // Check that the next char is not another '.' (e.g. range operator in some langs) + if l.pos+1 < len(l.src) && l.src[l.pos+1] == '.' { + break + } + isFloat = true + l.pos++ + l.col++ + } else if (ch == 'e' || ch == 'E') && !isFloat { + isFloat = true + l.pos++ + l.col++ + if l.pos < len(l.src) && (l.src[l.pos] == '+' || l.src[l.pos] == '-') { + l.pos++ + l.col++ + } + } else if (ch == 'e' || ch == 'E') && isFloat { + l.pos++ + l.col++ + if l.pos < len(l.src) && (l.src[l.pos] == '+' || l.src[l.pos] == '-') { + l.pos++ + l.col++ + } + } else if ch == 'j' || ch == 'J' { + // complex literal — treat as float for now + l.pos++ + l.col++ + isFloat = true + break + } else { + break + } + } + + // Handle float starting with '.'. + val := string(l.src[start:l.pos]) + if isFloat { + return Token{Kind: TokFloat, Value: val, Pos: pos} + } + return Token{Kind: TokInt, Value: val, Pos: pos} +} + +func (l *Lexer) readHex(pos Pos, start int) Token { + // consume 0x + l.pos += 2 + l.col += 2 + for l.pos < len(l.src) { + ch := l.src[l.pos] + if isHexDigit(ch) || ch == '_' { + l.pos++ + l.col++ + } else { + break + } + } + return Token{Kind: TokInt, Value: string(l.src[start:l.pos]), Pos: pos} +} + +func (l *Lexer) readOctal(pos Pos, start int) Token { + l.pos += 2 + l.col += 2 + for l.pos < len(l.src) { + ch := l.src[l.pos] + if (ch >= '0' && ch <= '7') || ch == '_' { + l.pos++ + l.col++ + } else { + break + } + } + return Token{Kind: TokInt, Value: string(l.src[start:l.pos]), Pos: pos} +} + +func (l *Lexer) readBinary(pos Pos, start int) Token { + l.pos += 2 + l.col += 2 + for l.pos < len(l.src) { + ch := l.src[l.pos] + if ch == '0' || ch == '1' || ch == '_' { + l.pos++ + l.col++ + } else { + break + } + } + return Token{Kind: TokInt, Value: string(l.src[start:l.pos]), Pos: pos} +} + +func isHexDigit(ch rune) bool { + return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') +} + +// readName reads an identifier or keyword. +func (l *Lexer) readName() Token { + pos := l.curPos() + start := l.pos + for l.pos < len(l.src) { + ch := l.src[l.pos] + if ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) { + l.pos++ + l.col++ + } else { + break + } + } + val := string(l.src[start:l.pos]) + return Token{Kind: TokName, Value: val, Pos: pos} +} + +// readOp reads an operator or punctuation token. +func (l *Lexer) readOp() Token { + pos := l.curPos() + ch := l.src[l.pos] + + // Track paren depth for indent/dedent logic. + switch ch { + case '(': + l.paren++ + l.pos++ + l.col++ + return Token{Kind: TokOp, Value: "(", Pos: pos} + case ')': + if l.paren > 0 { + l.paren-- + } + l.pos++ + l.col++ + return Token{Kind: TokOp, Value: ")", Pos: pos} + case '[': + l.paren++ + l.pos++ + l.col++ + return Token{Kind: TokOp, Value: "[", Pos: pos} + case ']': + if l.paren > 0 { + l.paren-- + } + l.pos++ + l.col++ + return Token{Kind: TokOp, Value: "]", Pos: pos} + case '{': + l.paren++ + l.pos++ + l.col++ + return Token{Kind: TokOp, Value: "{", Pos: pos} + case '}': + if l.paren > 0 { + l.paren-- + } + l.pos++ + l.col++ + return Token{Kind: TokOp, Value: "}", Pos: pos} + } + + // Try multi-character operators first. + if l.pos+2 < len(l.src) { + three := string(l.src[l.pos : l.pos+3]) + switch three { + case "<<=", ">>=", "**=", "//=": + l.pos += 3 + l.col += 3 + return Token{Kind: TokOp, Value: three, Pos: pos} + } + } + + if l.pos+1 < len(l.src) { + two := string(l.src[l.pos : l.pos+2]) + switch two { + case "**", "//", "<<", ">>", "<=", ">=", "!=", "==", + "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", + "->", ":=": + l.pos += 2 + l.col += 2 + return Token{Kind: TokOp, Value: two, Pos: pos} + } + } + + // Single character operators. + l.pos++ + l.col++ + return Token{Kind: TokOp, Value: string(ch), Pos: pos} +} + +// tokenKindString returns a human-readable name for a token kind. +func tokenKindString(k TokenKind) string { + switch k { + case TokEOF: + return "EOF" + case TokNewline: + return "NEWLINE" + case TokIndent: + return "INDENT" + case TokDedent: + return "DEDENT" + case TokName: + return "NAME" + case TokInt: + return "INT" + case TokFloat: + return "FLOAT" + case TokString: + return "STRING" + case TokBytes: + return "BYTES" + case TokOp: + return "OP" + case TokComment: + return "COMMENT" + default: + return fmt.Sprintf("Token(%d)", int(k)) + } +} + +// ensure utf8 is used (for utf8.RuneLen in potential future use) +var _ = utf8.RuneLen +var _ = unicode.IsLetter + +// ensure strings is used +var _ = strings.Builder{} diff --git a/builtins/python/modules.go b/builtins/python/modules.go new file mode 100644 index 00000000..af2cfb08 --- /dev/null +++ b/builtins/python/modules.go @@ -0,0 +1,802 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "bufio" + "encoding/base64" + "encoding/hex" + "fmt" + "hash/crc32" + "math" + "math/big" + "os" + "path/filepath" + "runtime" + "strings" +) + +// ---- Module registry ---- + +type moduleFactory func(opts *RunOpts) *PyModule + +var moduleRegistry map[string]moduleFactory + +func init() { + moduleRegistry = map[string]moduleFactory{ + "sys": makeSysModule, + "math": makeMathModule, + "os": makeOsModule, + "binascii": makeBinasciModule, + "string": makeStringModule, + // Blocked modules + "tempfile": makeBlockedModule("tempfile"), + "glob": makeBlockedModule("glob"), + "subprocess": makeBlockedModule("subprocess"), + "socket": makeBlockedModule("socket"), + "ctypes": makeBlockedModule("ctypes"), + "multiprocessing": makeBlockedModule("multiprocessing"), + "threading": makeBlockedModule("threading"), + "asyncio": makeBlockedModule("asyncio"), + // re is not implemented; block it so `import re` raises ImportError and + // `try: import re except ImportError: ...` works correctly. + "re": makeBlockedModule("re"), + } +} + +func makeBlockedModule(name string) moduleFactory { + return func(_ *RunOpts) *PyModule { + panic(exceptionSignal{exc: newExceptionf(ExcImportError, "module %q is not available in this shell", name)}) + } +} + +// loadModule returns (module, found). Panics with ImportError if found but blocked. +func loadModule(name string, opts *RunOpts) (*PyModule, bool) { + factory, ok := moduleRegistry[name] + if !ok { + return nil, false + } + mod := factory(opts) // may panic for blocked modules + return mod, true +} + +// goosToSysPlatform converts a runtime.GOOS value to the string that Python's +// sys.platform reports on each OS. This matches CPython behaviour. +func goosToSysPlatform(goos string) string { + switch goos { + case "darwin": + return "darwin" + case "windows": + return "win32" + default: + return "linux" + } +} + +// ---- sys module ---- + +func makeSysModule(opts *RunOpts) *PyModule { + argv := make([]Object, 0, 1+len(opts.Args)) + argv = append(argv, pyStr(opts.SourceName)) + for _, a := range opts.Args { + argv = append(argv, pyStr(a)) + } + + sysMod := &PyModule{Name: "sys", Dict: map[string]Object{ + "argv": pyList(argv), + "stdout": &PyFile{w: opts.Stdout, name: ""}, + "stderr": &PyFile{w: opts.Stderr, name: ""}, + "stdin": nil, // set below + "version": pyStr("3.12.0 (rshell custom interpreter)"), + "version_info": pyTuple([]Object{pyInt(3), pyInt(12), pyInt(0), pyStr("final"), pyInt(0)}), + "platform": pyStr(goosToSysPlatform(runtime.GOOS)), + "path": pyList([]Object{}), + "modules": pyDict(), + "maxsize": pyInt(int64(^uint(0) >> 1)), + "exit": nil, // set below + "__name__": pyStr("sys"), + }} + + // stdin — reuse the persistent stdinReader from RunOpts so that input() and + // sys.stdin.read*()/readline() share one bufio.Reader and do not lose + // read-ahead bytes to each other. + if opts.Stdin != nil && opts.stdinReader != nil { + sysMod.Dict["stdin"] = &PyFile{r: opts.stdinReader, name: ""} + } else { + sysMod.Dict["stdin"] = &PyFile{r: bufio.NewReader(strings.NewReader("")), name: ""} + } + + // sys.exit + sysMod.Dict["exit"] = makeBuiltin("exit", func(args []Object, kwargs map[string]Object) Object { + code := 0 + if len(args) > 0 { + switch v := args[0].(type) { + case *PyInt: + if n, ok := v.int64(); ok { + code = int(n) + } else { + code = 1 + } + case *PyNone: + code = 0 + case *PyBool: + if v.v { + code = 1 + } + default: + // Print message to stderr and exit 1 + fmt.Fprint(opts.Stderr, args[0].pyStr()+"\n") + code = 1 + } + } + panic(controlSignal{kind: ctrlSysExit, value: pyInt(int64(code))}) + }) + + return sysMod +} + +// ---- math module ---- + +func makeMathModule(_ *RunOpts) *PyModule { + wrapF := func(name string, fn func(float64) float64) *PyBuiltin { + return makeBuiltin(name, func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("%s() takes exactly 1 argument (%d given)", name, len(args)) + } + return pyFloat(fn(toFloat(args[0]))) + }) + } + + wrapF2 := func(name string, fn func(float64, float64) float64) *PyBuiltin { + return makeBuiltin(name, func(args []Object, _ map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("%s() takes exactly 2 arguments (%d given)", name, len(args)) + } + return pyFloat(fn(toFloat(args[0]), toFloat(args[1]))) + }) + } + + return &PyModule{Name: "math", Dict: map[string]Object{ + "floor": makeBuiltin("floor", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("floor() takes exactly 1 argument") + } + return pyInt(int64(math.Floor(toFloat(args[0])))) + }), + "ceil": makeBuiltin("ceil", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("ceil() takes exactly 1 argument") + } + return pyInt(int64(math.Ceil(toFloat(args[0])))) + }), + "sqrt": wrapF("sqrt", math.Sqrt), + "log": makeBuiltin("log", mathLog), + "log2": wrapF("log2", math.Log2), + "log10": wrapF("log10", math.Log10), + "sin": wrapF("sin", math.Sin), + "cos": wrapF("cos", math.Cos), + "tan": wrapF("tan", math.Tan), + "asin": wrapF("asin", math.Asin), + "acos": wrapF("acos", math.Acos), + "atan": wrapF("atan", math.Atan), + "atan2": wrapF2("atan2", math.Atan2), + "exp": wrapF("exp", math.Exp), + "pow": wrapF2("pow", math.Pow), + "fabs": wrapF("fabs", math.Abs), + "isnan": makeBuiltin("isnan", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("isnan() takes exactly 1 argument") + } + return pyBool(math.IsNaN(toFloat(args[0]))) + }), + "isinf": makeBuiltin("isinf", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("isinf() takes exactly 1 argument") + } + return pyBool(math.IsInf(toFloat(args[0]), 0)) + }), + "isfinite": makeBuiltin("isfinite", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("isfinite() takes exactly 1 argument") + } + f := toFloat(args[0]) + return pyBool(!math.IsNaN(f) && !math.IsInf(f, 0)) + }), + "trunc": makeBuiltin("trunc", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("trunc() takes exactly 1 argument") + } + return pyInt(int64(math.Trunc(toFloat(args[0])))) + }), + "gcd": makeBuiltin("gcd", mathGcd), + "factorial": makeBuiltin("factorial", mathFactorial), + "hypot": wrapF2("hypot", math.Hypot), + "degrees": wrapF("degrees", func(r float64) float64 { + return r * 180 / math.Pi + }), + "radians": wrapF("radians", func(d float64) float64 { + return d * math.Pi / 180 + }), + "pi": pyFloat(math.Pi), + "e": pyFloat(math.E), + "tau": pyFloat(2 * math.Pi), + "inf": pyFloat(math.Inf(1)), + "nan": pyFloat(math.NaN()), + "fsum": makeBuiltin("fsum", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("fsum() takes exactly 1 argument") + } + items := collectIterable(args[0]) + sum := 0.0 + for _, item := range items { + sum += toFloat(item) + } + return pyFloat(sum) + }), + "comb": makeBuiltin("comb", func(args []Object, _ map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("comb() takes exactly 2 arguments") + } + n := toIntVal(args[0]) + k := toIntVal(args[1]) + if k < 0 || k > n { + return pyInt(0) + } + if k > 10000 { + raiseValueError("math.comb argument is too large") + } + // C(n, k) = n! / (k! * (n-k)!) + result := big.NewInt(1) + for i := int64(0); i < k; i++ { + result.Mul(result, big.NewInt(n-i)) + result.Div(result, big.NewInt(i+1)) + } + return pyIntBig(result) + }), + "perm": makeBuiltin("perm", func(args []Object, _ map[string]Object) Object { + if len(args) < 1 || len(args) > 2 { + raiseTypeError("perm() takes 1 or 2 arguments") + } + n := toIntVal(args[0]) + k := n + if len(args) == 2 && args[1] != pyNone { + k = toIntVal(args[1]) + } + if k < 0 || k > n { + return pyInt(0) + } + if k > 10000 { + raiseValueError("math.perm argument is too large") + } + result := big.NewInt(1) + for i := int64(0); i < k; i++ { + result.Mul(result, big.NewInt(n-i)) + } + return pyIntBig(result) + }), + }} +} + +func mathLog(args []Object, _ map[string]Object) Object { + if len(args) < 1 || len(args) > 2 { + raiseTypeError("log() takes 1 or 2 arguments (%d given)", len(args)) + } + x := toFloat(args[0]) + if len(args) == 1 { + return pyFloat(math.Log(x)) + } + base := toFloat(args[1]) + return pyFloat(math.Log(x) / math.Log(base)) +} + +func mathGcd(args []Object, _ map[string]Object) Object { + if len(args) < 2 { + raiseTypeError("gcd() takes at least 2 arguments") + } + a := new(big.Int).Abs(toIntValObj(args[0])) + for _, arg := range args[1:] { + b := new(big.Int).Abs(toIntValObj(arg)) + a.GCD(nil, nil, a, b) + } + return pyIntBig(a) +} + +func mathFactorial(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("factorial() takes exactly 1 argument") + } + n := toIntVal(args[0]) + if n < 0 { + raiseValueError("factorial() not defined for negative values") + } + if n > 10000 { + raiseValueError("factorial() argument is too large") + } + result := big.NewInt(1) + for i := int64(2); i <= n; i++ { + result.Mul(result, big.NewInt(i)) + } + return pyIntBig(result) +} + +// ---- os module ---- + +func makeOsModule(opts *RunOpts) *PyModule { + osPath := makeOsPathModule(opts) + + linesep := "\n" + osName := "posix" + if runtime.GOOS == "windows" { + linesep = "\r\n" + osName = "nt" + } + + return &PyModule{Name: "os", Dict: map[string]Object{ + "path": osPath, + // os.environ is an empty dict. Python code may write to it (the writes are + // in-memory only and do not affect the host process). This is intentional: + // os.getenv() always returns the default to prevent host env leakage. + "environ": pyDict(), + "getenv": makeBuiltin("getenv", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("getenv() missing required argument: 'key'") + } + // Always return the default — Python must not access the host process environment. + if len(args) >= 2 { + return args[1] + } + return pyNone + }), + "listdir": makeBuiltin("listdir", func(args []Object, _ map[string]Object) Object { + dir := "." + if len(args) > 0 { + dir = mustStr(args[0], "listdir") + } + entries, err := opts.ReadDir(opts.Ctx, dir) + if err != nil { + raiseOSError(err.Error()) + } + items := make([]Object, len(entries)) + for i, e := range entries { + items[i] = pyStr(e.Name()) + } + return pyList(items) + }), + "sep": pyStr(string(filepath.Separator)), + "linesep": pyStr(linesep), + "curdir": pyStr("."), + "pardir": pyStr(".."), + "name": pyStr(osName), + "devnull": pyStr(os.DevNull), + "error": ExcOSError, + // Dangerous functions intentionally absent + }} +} + +func makeOsPathModule(opts *RunOpts) *PyModule { + return &PyModule{Name: "os.path", Dict: map[string]Object{ + "join": makeBuiltin("join", osPathJoin), + "exists": makeBuiltin("exists", makeOsPathExists(opts)), + "isfile": makeBuiltin("isfile", makeOsPathIsFile(opts)), + "isdir": makeBuiltin("isdir", makeOsPathIsDir(opts)), + "dirname": makeBuiltin("dirname", osPathDirname), + "basename": makeBuiltin("basename", osPathBasename), + "splitext": makeBuiltin("splitext", osPathSplitext), + "split": makeBuiltin("split", osPathSplit), + "sep": pyStr(string(filepath.Separator)), + "curdir": pyStr("."), + "pardir": pyStr(".."), + "extsep": pyStr("."), + "pathsep": pyStr(string(filepath.ListSeparator)), + "normpath": makeBuiltin("normpath", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("normpath() takes exactly 1 argument") + } + return pyStr(filepath.Clean(mustStr(args[0], "normpath"))) + }), + // abspath and realpath are intentionally absent: both call filepath.Abs + // which reads the host process CWD via os.Getwd, leaking the host path. + // This matches the policy that blocked os.getcwd() (commit f5235f88). + }} +} + +func osPathJoin(args []Object, _ map[string]Object) Object { + if len(args) == 0 { + raiseTypeError("join() requires at least 1 argument") + } + parts := make([]string, len(args)) + for i, arg := range args { + parts[i] = mustStr(arg, "join") + } + return pyStr(filepath.Join(parts...)) +} + +func makeOsPathExists(opts *RunOpts) func([]Object, map[string]Object) Object { + return func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("exists() takes exactly 1 argument") + } + path := mustStr(args[0], "exists") + _, err := opts.Stat(opts.Ctx, path) + return pyBool(err == nil) + } +} + +func makeOsPathIsFile(opts *RunOpts) func([]Object, map[string]Object) Object { + return func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("isfile() takes exactly 1 argument") + } + path := mustStr(args[0], "isfile") + info, err := opts.Stat(opts.Ctx, path) + if err != nil { + return pyFalse + } + return pyBool(!info.IsDir()) + } +} + +func makeOsPathIsDir(opts *RunOpts) func([]Object, map[string]Object) Object { + return func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("isdir() takes exactly 1 argument") + } + path := mustStr(args[0], "isdir") + info, err := opts.Stat(opts.Ctx, path) + if err != nil { + return pyFalse + } + return pyBool(info.IsDir()) + } +} + +func osPathDirname(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("dirname() takes exactly 1 argument") + } + return pyStr(filepath.Dir(mustStr(args[0], "dirname"))) +} + +func osPathBasename(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("basename() takes exactly 1 argument") + } + return pyStr(filepath.Base(mustStr(args[0], "basename"))) +} + +func osPathSplitext(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("splitext() takes exactly 1 argument") + } + p := mustStr(args[0], "splitext") + ext := filepath.Ext(p) + base := p[:len(p)-len(ext)] + return pyTuple([]Object{pyStr(base), pyStr(ext)}) +} + +func osPathSplit(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("split() takes exactly 1 argument") + } + p := mustStr(args[0], "split") + dir := filepath.Dir(p) + base := filepath.Base(p) + return pyTuple([]Object{pyStr(dir), pyStr(base)}) +} + +// ---- binascii module ---- + +func makeBinasciModule(_ *RunOpts) *PyModule { + return &PyModule{Name: "binascii", Dict: map[string]Object{ + "hexlify": makeBuiltin("hexlify", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("hexlify() takes exactly 1 argument") + } + b := mustBytes(args[0], "hexlify") + return pyBytes([]byte(hex.EncodeToString(b))) + }), + "unhexlify": makeBuiltin("unhexlify", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("unhexlify() takes exactly 1 argument") + } + var s string + switch v := args[0].(type) { + case *PyStr: + s = v.v + case *PyBytes: + s = string(v.v) + default: + raiseTypeError("unhexlify() argument must be str or bytes-like object") + } + b, err := hex.DecodeString(s) + if err != nil { + raiseValueError("Non-hexadecimal digit found") + } + return pyBytes(b) + }), + "b2a_base64": makeBuiltin("b2a_base64", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("b2a_base64() takes exactly 1 argument") + } + b := mustBytes(args[0], "b2a_base64") + encoded := base64.StdEncoding.EncodeToString(b) + "\n" + return pyBytes([]byte(encoded)) + }), + "a2b_base64": makeBuiltin("a2b_base64", func(args []Object, _ map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("a2b_base64() takes exactly 1 argument") + } + var s string + switch v := args[0].(type) { + case *PyStr: + s = v.v + case *PyBytes: + s = string(v.v) + default: + raiseTypeError("a2b_base64() argument must be str or bytes-like object") + } + s = strings.TrimSpace(s) + b, err := base64.StdEncoding.DecodeString(s) + if err != nil { + b, err = base64.RawStdEncoding.DecodeString(s) + if err != nil { + raiseValueError("Invalid base64-encoded string: %v", err) + } + } + return pyBytes(b) + }), + "crc32": makeBuiltin("crc32", func(args []Object, _ map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("crc32() takes at least 1 argument") + } + b := mustBytes(args[0], "crc32") + var init uint32 + if len(args) > 1 { + init = uint32(toIntVal(args[1])) + } + checksum := crc32.Update(init, crc32.IEEETable, b) + return pyInt(int64(checksum)) + }), + "Error": ExcValueError, // binascii.Error = ValueError (CPython) + }} +} + +// b2a_hex and a2b_hex are aliases +func init() { + // These aliases need the registry to exist, so we add them in init after makeBinasciModule is available +} + +// ---- string module ---- + +func makeStringModule(_ *RunOpts) *PyModule { + printable := "" + for i := 32; i < 127; i++ { + printable += string(rune(i)) + } + + return &PyModule{Name: "string", Dict: map[string]Object{ + "ascii_letters": pyStr("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), + "ascii_lowercase": pyStr("abcdefghijklmnopqrstuvwxyz"), + "ascii_uppercase": pyStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), + "digits": pyStr("0123456789"), + "hexdigits": pyStr("0123456789abcdefABCDEF"), + "octdigits": pyStr("01234567"), + "punctuation": pyStr(`!"#$%&'()*+,-./:;<=>?@[\]^_` + "`{|}~"), + "whitespace": pyStr(" \t\n\r\x0b\x0c"), + "printable": pyStr(printable), + "Formatter": makeBuiltin("Formatter", func(args []Object, kwargs map[string]Object) Object { + raiseTypeError("string.Formatter is not implemented in this shell") + return nil + }), + "Template": makeBuiltin("Template", func(args []Object, kwargs map[string]Object) Object { + raiseTypeError("string.Template is not implemented in this shell") + return nil + }), + "capwords": makeBuiltin("capwords", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("capwords() requires at least 1 argument") + } + s := mustStr(args[0], "capwords") + sep := " " + if len(args) > 1 && args[1] != pyNone { + sep = mustStr(args[1], "capwords") + } + words := strings.Split(s, sep) + for i, w := range words { + if len(w) > 0 { + words[i] = strings.ToUpper(w[:1]) + strings.ToLower(w[1:]) + } + } + return pyStr(strings.Join(words, sep)) + }), + }} +} + +// ---- Helper functions ---- + +// toFloat converts an Object to a float64. +func toFloat(obj Object) float64 { + switch v := obj.(type) { + case *PyFloat: + return v.v + case *PyInt: + if n, ok := v.int64(); ok { + return float64(n) + } + f, _ := new(big.Float).SetInt(v.big).Float64() + return f + case *PyBool: + if v.v { + return 1 + } + return 0 + } + raiseTypeError("must be real number, not '%s'", obj.pyType().Name) + return 0 +} + +// mustBytes extracts bytes from an Object or raises TypeError. +func mustBytes(obj Object, fnName string) []byte { + switch v := obj.(type) { + case *PyBytes: + return v.v + } + raiseTypeError("%s() argument must be bytes-like object, not '%s'", fnName, obj.pyType().Name) + return nil +} + +// raiseOSError panics with an OSError. +func raiseOSError(msg string) { + panic(exceptionSignal{exc: newExceptionf(ExcOSError, "%s", msg)}) +} + +// ---- json module (stub) ---- + +func makeJsonModule(_ *RunOpts) *PyModule { + return &PyModule{Name: "json", Dict: map[string]Object{ + "dumps": makeBuiltin("dumps", func(args []Object, _ map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("dumps() requires at least 1 argument") + } + return pyStr(jsonDumps(args[0])) + }), + "loads": makeBuiltin("loads", func(args []Object, _ map[string]Object) Object { + // json.loads() is not implemented. Raise ValueError (matching CPython's + // json.JSONDecodeError which is a subclass of ValueError) so callers + // using `except ValueError` can handle the error correctly. + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "json.loads() is not implemented in this shell")}) + return nil + }), + }} +} + +// jsonDumps converts a Python object to a JSON string. +func jsonDumps(obj Object) string { + switch v := obj.(type) { + case *PyNone: + return "null" + case *PyBool: + if v.v { + return "true" + } + return "false" + case *PyInt: + return v.pyRepr() + case *PyFloat: + return v.pyRepr() + case *PyStr: + // Basic JSON string escaping + var b strings.Builder + b.WriteByte('"') + for _, r := range v.v { + switch r { + case '"': + b.WriteString(`\"`) + case '\\': + b.WriteString(`\\`) + case '\n': + b.WriteString(`\n`) + case '\r': + b.WriteString(`\r`) + case '\t': + b.WriteString(`\t`) + default: + b.WriteRune(r) + } + } + b.WriteByte('"') + return b.String() + case *PyList: + parts := make([]string, len(v.items)) + for i, item := range v.items { + parts[i] = jsonDumps(item) + } + return "[" + strings.Join(parts, ", ") + "]" + case *PyTuple: + parts := make([]string, len(v.items)) + for i, item := range v.items { + parts[i] = jsonDumps(item) + } + return "[" + strings.Join(parts, ", ") + "]" + case *PyDict: + parts := make([]string, len(v.keys)) + for i, k := range v.keys { + parts[i] = jsonDumps(k) + ": " + jsonDumps(v.vals[i]) + } + return "{" + strings.Join(parts, ", ") + "}" + } + return "null" +} + +func init() { + // Add extra modules to the registry + moduleRegistry["json"] = makeJsonModule + moduleRegistry["collections"] = makeCollectionsModule +} + +func makeCollectionsModule(_ *RunOpts) *PyModule { + return &PyModule{Name: "collections", Dict: map[string]Object{ + "OrderedDict": makeBuiltin("OrderedDict", func(args []Object, kwargs map[string]Object) Object { + // OrderedDict is essentially a regular dict (which is already ordered in Python 3.7+) + d := pyDict() + if len(args) > 0 { + if other, ok := args[0].(*PyDict); ok { + for i, k := range other.keys { + d.set(k, other.vals[i]) + } + } + } + for k, v := range kwargs { + d.set(pyStr(k), v) + } + return d + }), + "defaultdict": makeBuiltin("defaultdict", func(args []Object, kwargs map[string]Object) Object { + // If a non-None default_factory is provided, raise NotImplementedError + // to avoid silent data corruption (KeyError would occur instead of + // auto-default behaviour, which is confusing and hard to debug). + if len(args) > 0 && args[0] != pyNone { + panic(exceptionSignal{exc: newExceptionf(ExcNotImplementedError, "collections.defaultdict default_factory is not implemented in this shell")}) + } + return pyDict() + }), + "namedtuple": makeBuiltin("namedtuple", func(args []Object, kwargs map[string]Object) Object { + raiseTypeError("collections.namedtuple is not implemented in this shell") + return nil + }), + "Counter": makeBuiltin("Counter", func(args []Object, kwargs map[string]Object) Object { + d := pyDict() + if len(args) > 0 { + items := collectIterable(args[0]) + for _, item := range items { + existing, ok := d.get(item) + if !ok { + d.set(item, pyInt(1)) + } else if n, ok2 := existing.(*PyInt); ok2 { + val, _ := n.int64() + d.set(item, pyInt(val+1)) + } + } + } + return d + }), + "deque": makeBuiltin("deque", func(args []Object, kwargs map[string]Object) Object { + // Simplified: return a regular list + if len(args) > 0 { + items := collectIterable(args[0]) + return pyList(items) + } + return pyList(nil) + }), + }} +} + +// ---- Formatting helpers for fmt.Fprintf using %v ---- + +func init() { + // Ensure the fmt package is used + _ = fmt.Sprintf +} diff --git a/builtins/python/parse_test.go b/builtins/python/parse_test.go new file mode 100644 index 00000000..b143bd18 --- /dev/null +++ b/builtins/python/parse_test.go @@ -0,0 +1,32 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "testing" +) + +func TestParseListComp(t *testing.T) { + tests := []struct { + name string + src string + }{ + {"basic_nl", "[x * x for x in range(5)]\n"}, + {"basic", "[x * x for x in range(5)]"}, + {"semi", "squares = [x * x for x in range(5)]; print(squares)"}, + {"semi_nl", "squares = [x * x for x in range(5)]; print(squares)\n"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := Parse(tt.src, "") + if err != nil { + t.Logf("parse error: %v", err) + } else { + t.Logf("ok") + } + }) + } +} diff --git a/builtins/python/parser.go b/builtins/python/parser.go new file mode 100644 index 00000000..3eecdaf5 --- /dev/null +++ b/builtins/python/parser.go @@ -0,0 +1,2349 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "fmt" + "math" + "math/big" + "strconv" + "strings" + "unicode/utf8" +) + +// SyntaxError is returned for parse errors. +type SyntaxError struct { + Msg string + Pos Pos + File string +} + +func (e *SyntaxError) Error() string { + return fmt.Sprintf("%s:%d:%d: SyntaxError: %s", e.File, e.Pos.Line, e.Pos.Col, e.Msg) +} + +// Parse parses Python source code and returns the module AST or a SyntaxError. +func Parse(src, filename string) (*Module, error) { + p := &Parser{ + lex: NewLexer(src), + filename: filename, + } + // Prime the two-token lookahead. + p.cur = p.lex.Next() + p.peek = p.lex.Next() + + return p.parseModule() +} + +// Parser is a recursive-descent Python parser. +type Parser struct { + lex *Lexer + cur Token + peek Token + filename string +} + +// advance consumes the current token, shifting the lookahead window. +func (p *Parser) advance() Token { + t := p.cur + p.cur = p.peek + p.peek = p.lex.Next() + return t +} + +// expect asserts that the current token matches and advances. +func (p *Parser) expect(kind TokenKind, value string) (Token, error) { + if p.cur.Kind != kind || (value != "" && p.cur.Value != value) { + return Token{}, p.syntaxErrorf("expected %s %q, got %s %q", + tokenKindString(kind), value, tokenKindString(p.cur.Kind), p.cur.Value) + } + return p.advance(), nil +} + +// check returns true if the current token matches without consuming. +func (p *Parser) check(kind TokenKind, value string) bool { + return p.cur.Kind == kind && (value == "" || p.cur.Value == value) +} + +// match consumes and returns true if the current token matches. +func (p *Parser) match(kind TokenKind, value string) bool { + if p.check(kind, value) { + p.advance() + return true + } + return false +} + +// syntaxErrorf creates a SyntaxError at the current position. +func (p *Parser) syntaxErrorf(format string, args ...interface{}) *SyntaxError { + return &SyntaxError{ + Msg: fmt.Sprintf(format, args...), + Pos: p.cur.Pos, + File: p.filename, + } +} + +// syntaxErrorAt creates a SyntaxError at a specific position. +func (p *Parser) syntaxErrorAt(pos Pos, format string, args ...interface{}) *SyntaxError { + return &SyntaxError{ + Msg: fmt.Sprintf(format, args...), + Pos: pos, + File: p.filename, + } +} + +// skipNewlines skips over any newline tokens. +func (p *Parser) skipNewlines() { + for p.cur.Kind == TokNewline { + p.advance() + } +} + +// ---- Top-level parsing ---- + +func (p *Parser) parseModule() (*Module, error) { + mod := &Module{Pos: p.cur.Pos} + p.skipNewlines() + for p.cur.Kind != TokEOF { + stmts, err := p.parseStmt() + if err != nil { + return nil, err + } + mod.Body = append(mod.Body, stmts...) + p.skipNewlines() + } + return mod, nil +} + +// parseStmtList parses an indented block: INDENT stmts DEDENT. +func (p *Parser) parseStmtList() ([]Stmt, error) { + if _, err := p.expect(TokIndent, ""); err != nil { + return nil, err + } + var stmts []Stmt + p.skipNewlines() + for p.cur.Kind != TokDedent && p.cur.Kind != TokEOF { + ss, err := p.parseStmt() + if err != nil { + return nil, err + } + stmts = append(stmts, ss...) + p.skipNewlines() + } + if p.cur.Kind == TokDedent { + p.advance() + } + return stmts, nil +} + +// parseStmt dispatches to the appropriate statement parser. +func (p *Parser) parseStmt() ([]Stmt, error) { + // Handle decorators. + if p.check(TokOp, "@") { + s, err := p.parseDecorated() + if err != nil { + return nil, err + } + return []Stmt{s}, nil + } + + if p.cur.Kind == TokName { + switch p.cur.Value { + case "if": + s, err := p.parseIfProper() + if err != nil { + return nil, err + } + return []Stmt{s}, nil + case "while": + s, err := p.parseWhile() + if err != nil { + return nil, err + } + return []Stmt{s}, nil + case "for": + s, err := p.parseFor() + if err != nil { + return nil, err + } + return []Stmt{s}, nil + case "def": + s, err := p.parseFuncDef(nil) + if err != nil { + return nil, err + } + return []Stmt{s}, nil + case "async": + // async def or async for — treat async def as regular def + if p.peek.Kind == TokName && p.peek.Value == "def" { + p.advance() // consume 'async' + s, err := p.parseFuncDef(nil) + if err != nil { + return nil, err + } + return []Stmt{s}, nil + } + if p.peek.Kind == TokName && p.peek.Value == "for" { + p.advance() // consume 'async' + s, err := p.parseFor() + if err != nil { + return nil, err + } + return []Stmt{s}, nil + } + // fall through to simple stmt + case "class": + s, err := p.parseClassDef(nil) + if err != nil { + return nil, err + } + return []Stmt{s}, nil + case "try": + s, err := p.parseTry() + if err != nil { + return nil, err + } + return []Stmt{s}, nil + case "with": + s, err := p.parseWith() + if err != nil { + return nil, err + } + return []Stmt{s}, nil + } + } + + return p.parseSimpleStmts() +} + +// parseSimpleStmts parses one or more semicolon-separated simple statements +// terminated by a newline or EOF. +func (p *Parser) parseSimpleStmts() ([]Stmt, error) { + var stmts []Stmt + s, err := p.parseSimpleStmt() + if err != nil { + return nil, err + } + stmts = append(stmts, s) + for p.match(TokOp, ";") { + if p.cur.Kind == TokNewline || p.cur.Kind == TokEOF { + break + } + s, err = p.parseSimpleStmt() + if err != nil { + return nil, err + } + stmts = append(stmts, s) + } + // consume trailing newline + if p.cur.Kind == TokNewline { + p.advance() + } + return stmts, nil +} + +// parseSimpleStmt parses a single simple statement. +func (p *Parser) parseSimpleStmt() (Stmt, error) { + if p.cur.Kind != TokName { + return p.parseAssignOrExprStmt() + } + pos := p.cur.Pos + switch p.cur.Value { + case "return": + return p.parseReturn() + case "raise": + return p.parseRaise() + case "del": + return p.parseDel() + case "pass": + p.advance() + return &PassStmt{Pos: pos}, nil + case "break": + p.advance() + return &BreakStmt{Pos: pos}, nil + case "continue": + p.advance() + return &ContinueStmt{Pos: pos}, nil + case "global": + return p.parseGlobal() + case "nonlocal": + return p.parseNonlocal() + case "assert": + return p.parseAssert() + case "import": + return p.parseImport() + case "from": + return p.parseFromImport() + case "yield": + e, err := p.parseYieldExpr() + if err != nil { + return nil, err + } + return &ExprStmt{Pos: pos, Value: e}, nil + } + return p.parseAssignOrExprStmt() +} + +// ---- Compound statements ---- + +func (p *Parser) parseIfProper() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'if' + test, err := p.parseExpr() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + body, err := p.parseSuite() + if err != nil { + return nil, err + } + + outer := &IfStmt{Pos: pos, Test: test, Body: body} + current := outer + + for p.cur.Kind == TokName && p.cur.Value == "elif" { + elifPos := p.cur.Pos + p.advance() + elifTest, err := p.parseExpr() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + elifBody, err := p.parseSuite() + if err != nil { + return nil, err + } + nested := &IfStmt{Pos: elifPos, Test: elifTest, Body: elifBody} + current.Orelse = []Stmt{nested} + current = nested + } + + if p.cur.Kind == TokName && p.cur.Value == "else" { + p.advance() + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + elseBody, err := p.parseSuite() + if err != nil { + return nil, err + } + current.Orelse = elseBody + } + + return outer, nil +} + +func (p *Parser) parseWhile() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'while' + test, err := p.parseExpr() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + body, err := p.parseSuite() + if err != nil { + return nil, err + } + s := &WhileStmt{Pos: pos, Test: test, Body: body} + if p.cur.Kind == TokName && p.cur.Value == "else" { + p.advance() + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + orelse, err := p.parseSuite() + if err != nil { + return nil, err + } + s.Orelse = orelse + } + return s, nil +} + +func (p *Parser) parseFor() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'for' + target, err := p.parseTargetList() + if err != nil { + return nil, err + } + if _, err := p.expect(TokName, "in"); err != nil { + return nil, err + } + iter, err := p.parseTestList() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + body, err := p.parseSuite() + if err != nil { + return nil, err + } + s := &ForStmt{Pos: pos, Target: target, Iter: iter, Body: body} + if p.cur.Kind == TokName && p.cur.Value == "else" { + p.advance() + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + orelse, err := p.parseSuite() + if err != nil { + return nil, err + } + s.Orelse = orelse + } + return s, nil +} + +func (p *Parser) parseFuncDef(decorators []Expr) (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'def' + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, "("); err != nil { + return nil, err + } + args, err := p.parseFuncArgs() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ")"); err != nil { + return nil, err + } + // Optional return annotation: -> expr + if p.match(TokOp, "->") { + _, err = p.parseExpr() // discard annotation + if err != nil { + return nil, err + } + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + body, err := p.parseSuite() + if err != nil { + return nil, err + } + isGen := containsYield(body) + return &FuncDef{ + Pos: pos, + Name: nameTok.Value, + Args: args, + Body: body, + Decorators: decorators, + IsGen: isGen, + }, nil +} + +func (p *Parser) parseClassDef(decorators []Expr) (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'class' + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + var bases []Expr + if p.match(TokOp, "(") { + if !p.check(TokOp, ")") { + bases, err = p.parseArgList() + if err != nil { + return nil, err + } + } + if _, err := p.expect(TokOp, ")"); err != nil { + return nil, err + } + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + body, err := p.parseSuite() + if err != nil { + return nil, err + } + return &ClassDef{ + Pos: pos, + Name: nameTok.Value, + Bases: bases, + Body: body, + Decorators: decorators, + }, nil +} + +func (p *Parser) parseTry() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'try' + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + body, err := p.parseSuite() + if err != nil { + return nil, err + } + + var handlers []*ExceptHandler + var orelse, finally []Stmt + + // except clauses + for p.cur.Kind == TokName && p.cur.Value == "except" { + hPos := p.cur.Pos + p.advance() + h := &ExceptHandler{Pos: hPos} + if !p.check(TokOp, ":") { + h.Type, err = p.parseExpr() + if err != nil { + return nil, err + } + if p.match(TokName, "as") { + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + h.Name = nameTok.Value + } + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + h.Body, err = p.parseSuite() + if err != nil { + return nil, err + } + handlers = append(handlers, h) + } + + if p.cur.Kind == TokName && p.cur.Value == "else" { + p.advance() + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + orelse, err = p.parseSuite() + if err != nil { + return nil, err + } + } + + if p.cur.Kind == TokName && p.cur.Value == "finally" { + p.advance() + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + finally, err = p.parseSuite() + if err != nil { + return nil, err + } + } + + if len(handlers) == 0 && len(finally) == 0 { + return nil, p.syntaxErrorAt(pos, "try statement must have at least one except or finally clause") + } + + return &TryStmt{ + Pos: pos, + Body: body, + Handlers: handlers, + Orelse: orelse, + Finally: finally, + }, nil +} + +func (p *Parser) parseWith() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'with' + + var items []*WithItem + item, err := p.parseWithItem() + if err != nil { + return nil, err + } + items = append(items, item) + for p.match(TokOp, ",") { + item, err = p.parseWithItem() + if err != nil { + return nil, err + } + items = append(items, item) + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + body, err := p.parseSuite() + if err != nil { + return nil, err + } + return &WithStmt{Pos: pos, Items: items, Body: body}, nil +} + +func (p *Parser) parseWithItem() (*WithItem, error) { + ctx, err := p.parseExpr() + if err != nil { + return nil, err + } + item := &WithItem{CtxExpr: ctx} + if p.match(TokName, "as") { + optVar, err := p.parseExpr() + if err != nil { + return nil, err + } + item.OptVar = optVar + } + return item, nil +} + +func (p *Parser) parseDecorated() (Stmt, error) { + var decorators []Expr + for p.check(TokOp, "@") { + p.advance() // consume '@' + dec, err := p.parseExpr() + if err != nil { + return nil, err + } + decorators = append(decorators, dec) + if p.cur.Kind == TokNewline { + p.advance() + } + p.skipNewlines() + } + if p.cur.Kind == TokName && p.cur.Value == "def" { + return p.parseFuncDef(decorators) + } + if p.cur.Kind == TokName && p.cur.Value == "async" { + if p.peek.Kind == TokName && p.peek.Value == "def" { + p.advance() // consume 'async' + return p.parseFuncDef(decorators) + } + } + if p.cur.Kind == TokName && p.cur.Value == "class" { + return p.parseClassDef(decorators) + } + return nil, p.syntaxErrorf("expected 'def' or 'class' after decorator") +} + +// parseSuite parses either an inline simple stmt list or an indented block. +func (p *Parser) parseSuite() ([]Stmt, error) { + if p.cur.Kind == TokNewline { + p.advance() + p.skipNewlines() + return p.parseStmtList() + } + // Inline suite. + stmts, err := p.parseSimpleStmts() + if err != nil { + return nil, err + } + return stmts, nil +} + +// ---- Simple statements ---- + +func (p *Parser) parseReturn() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'return' + if p.cur.Kind == TokNewline || p.cur.Kind == TokEOF || p.check(TokOp, ";") { + return &ReturnStmt{Pos: pos}, nil + } + val, err := p.parseTestList() + if err != nil { + return nil, err + } + return &ReturnStmt{Pos: pos, Value: val}, nil +} + +func (p *Parser) parseRaise() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'raise' + if p.cur.Kind == TokNewline || p.cur.Kind == TokEOF || p.check(TokOp, ";") { + return &RaiseStmt{Pos: pos}, nil + } + exc, err := p.parseExpr() + if err != nil { + return nil, err + } + s := &RaiseStmt{Pos: pos, Exc: exc} + if p.match(TokName, "from") { + cause, err := p.parseExpr() + if err != nil { + return nil, err + } + s.Cause = cause + } + return s, nil +} + +func (p *Parser) parseDel() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'del' + targets, err := p.parseExprList() + if err != nil { + return nil, err + } + return &DelStmt{Pos: pos, Targets: targets}, nil +} + +func (p *Parser) parseGlobal() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'global' + var names []string + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + names = append(names, nameTok.Value) + for p.match(TokOp, ",") { + nameTok, err = p.expect(TokName, "") + if err != nil { + return nil, err + } + names = append(names, nameTok.Value) + } + return &GlobalStmt{Pos: pos, Names: names}, nil +} + +func (p *Parser) parseNonlocal() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'nonlocal' + var names []string + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + names = append(names, nameTok.Value) + for p.match(TokOp, ",") { + nameTok, err = p.expect(TokName, "") + if err != nil { + return nil, err + } + names = append(names, nameTok.Value) + } + return &NonlocalStmt{Pos: pos, Names: names}, nil +} + +func (p *Parser) parseAssert() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'assert' + test, err := p.parseExpr() + if err != nil { + return nil, err + } + s := &AssertStmt{Pos: pos, Test: test} + if p.match(TokOp, ",") { + msg, err := p.parseExpr() + if err != nil { + return nil, err + } + s.Msg = msg + } + return s, nil +} + +func (p *Parser) parseImport() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'import' + var names []ImportName + name, err := p.parseDottedName() + if err != nil { + return nil, err + } + alias := "" + if p.match(TokName, "as") { + aliasTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + alias = aliasTok.Value + } + names = append(names, ImportName{Name: name, Alias: alias}) + for p.match(TokOp, ",") { + name, err = p.parseDottedName() + if err != nil { + return nil, err + } + alias = "" + if p.match(TokName, "as") { + aliasTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + alias = aliasTok.Value + } + names = append(names, ImportName{Name: name, Alias: alias}) + } + return &ImportStmt{Pos: pos, Names: names}, nil +} + +func (p *Parser) parseFromImport() (Stmt, error) { + pos := p.cur.Pos + p.advance() // consume 'from' + + // Relative imports: leading dots. + var dots strings.Builder + for p.check(TokOp, ".") || p.check(TokOp, "...") { + dots.WriteString(p.cur.Value) + p.advance() + } + + modName := "" + if p.cur.Kind == TokName && p.cur.Value != "import" { + var err error + modName, err = p.parseDottedName() + if err != nil { + return nil, err + } + } + module := dots.String() + modName + + if _, err := p.expect(TokName, "import"); err != nil { + return nil, err + } + + var names []ImportName + if p.check(TokOp, "*") { + p.advance() + names = []ImportName{{Name: "*"}} + } else if p.match(TokOp, "(") { + var err error + names, err = p.parseImportAsNames() + if err != nil { + return nil, err + } + p.match(TokOp, ",") // trailing comma + if _, err := p.expect(TokOp, ")"); err != nil { + return nil, err + } + } else { + var err error + names, err = p.parseImportAsNames() + if err != nil { + return nil, err + } + } + + return &ImportFromStmt{Pos: pos, Module: module, Names: names}, nil +} + +func (p *Parser) parseImportAsNames() ([]ImportName, error) { + var names []ImportName + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + alias := "" + if p.match(TokName, "as") { + aliasTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + alias = aliasTok.Value + } + names = append(names, ImportName{Name: nameTok.Value, Alias: alias}) + for p.match(TokOp, ",") { + if p.cur.Kind != TokName { + break + } + nameTok, err = p.expect(TokName, "") + if err != nil { + return nil, err + } + alias = "" + if p.match(TokName, "as") { + aliasTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + alias = aliasTok.Value + } + names = append(names, ImportName{Name: nameTok.Value, Alias: alias}) + } + return names, nil +} + +func (p *Parser) parseDottedName() (string, error) { + nameTok, err := p.expect(TokName, "") + if err != nil { + return "", err + } + name := nameTok.Value + for p.check(TokOp, ".") { + p.advance() + part, err := p.expect(TokName, "") + if err != nil { + return "", err + } + name += "." + part.Value + } + return name, nil +} + +// parseAssignOrExprStmt handles assignments and expression statements. +func (p *Parser) parseAssignOrExprStmt() (Stmt, error) { + pos := p.cur.Pos + + // Parse the first expression (possibly a comma-separated tuple). + first, err := p.parseTestlistStarExpr() + if err != nil { + return nil, err + } + + // Augmented assignment? + if isAugOp(p.cur) { + op := p.cur.Value + p.advance() + rhs, err := p.parseTestList() + if err != nil { + return nil, err + } + return &AugAssignStmt{Pos: pos, Target: first, Op: op, Value: rhs}, nil + } + + // Annotated assignment? + if p.check(TokOp, ":") { + p.advance() + ann, err := p.parseExpr() + if err != nil { + return nil, err + } + s := &AnnAssignStmt{Pos: pos, Target: first, Annotation: ann} + if p.match(TokOp, "=") { + val, err := p.parseTestList() + if err != nil { + return nil, err + } + s.Value = val + } + return s, nil + } + + // Regular assignment chain: a = b = c + if p.check(TokOp, "=") { + targets := []Expr{first} + for p.match(TokOp, "=") { + var rhs Expr + // Allow yield/yield from on the RHS of assignment. + if p.cur.Kind == TokName && p.cur.Value == "yield" { + rhs, err = p.parseYieldExpr() + } else { + rhs, err = p.parseTestlistStarExpr() + } + if err != nil { + return nil, err + } + targets = append(targets, rhs) + } + // Last element is the value. + value := targets[len(targets)-1] + return &AssignStmt{Pos: pos, Targets: targets[:len(targets)-1], Value: value}, nil + } + + return &ExprStmt{Pos: pos, Value: first}, nil +} + +func isAugOp(t Token) bool { + if t.Kind != TokOp { + return false + } + switch t.Value { + case "+=", "-=", "*=", "/=", "//=", "%=", "**=", "&=", "|=", "^=", "<<=", ">>=", "@=": + return true + } + return false +} + +// ---- Function argument parsing ---- + +// parseFuncArgs parses the argument specification inside def f(...). +func (p *Parser) parseFuncArgs() (*Arguments, error) { + args := &Arguments{} + + afterStar := false + bareStarSeen := false + + for !p.check(TokOp, ")") && p.cur.Kind != TokEOF { + p.skipNewlines() + if p.check(TokOp, ")") { + break + } + + // **kwargs + if p.match(TokOp, "**") { + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + args.Kwarg = nameTok.Value + p.match(TokOp, ",") + break + } + + // *args or bare * + if p.match(TokOp, "*") { + if p.check(TokOp, ",") || p.check(TokOp, ")") { + // bare * + bareStarSeen = true + afterStar = true + } else { + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + args.Vararg = nameTok.Value + afterStar = true + } + _ = bareStarSeen + if p.check(TokOp, ",") { + p.advance() + continue + } + break + } + + // Regular arg or kwonly arg. + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + + // Optional type annotation. + if p.match(TokOp, ":") { + _, err = p.parseExpr() // discard annotation + if err != nil { + return nil, err + } + } + + var defaultVal Expr + if p.match(TokOp, "=") { + defaultVal, err = p.parseExpr() + if err != nil { + return nil, err + } + } + + if afterStar { + args.KwOnly = append(args.KwOnly, nameTok.Value) + args.KwDefaults = append(args.KwDefaults, defaultVal) + } else { + args.Args = append(args.Args, nameTok.Value) + args.Defaults = append(args.Defaults, defaultVal) + } + + if !p.match(TokOp, ",") { + break + } + } + return args, nil +} + +// ---- Expression parsing ---- + +// parseExpr parses a single expression (handles ternary). +func (p *Parser) parseExpr() (Expr, error) { + return p.parseTernary() +} + +// parseTernary: boolOr ('if' boolOr 'else' ternary)? +func (p *Parser) parseTernary() (Expr, error) { + body, err := p.parseBoolOr() + if err != nil { + return nil, err + } + if p.cur.Kind == TokName && p.cur.Value == "if" { + pos := p.cur.Pos + p.advance() + test, err := p.parseBoolOr() + if err != nil { + return nil, err + } + if _, err := p.expect(TokName, "else"); err != nil { + return nil, err + } + orelse, err := p.parseTernary() + if err != nil { + return nil, err + } + return &IfExp{Pos: pos, Test: test, Body: body, Orelse: orelse}, nil + } + return body, nil +} + +// parseBoolOr: boolAnd ('or' boolAnd)* +func (p *Parser) parseBoolOr() (Expr, error) { + left, err := p.parseBoolAnd() + if err != nil { + return nil, err + } + for p.cur.Kind == TokName && p.cur.Value == "or" { + pos := p.cur.Pos + p.advance() + right, err := p.parseBoolAnd() + if err != nil { + return nil, err + } + if bo, ok := left.(*BoolOp); ok && bo.Op == "or" { + bo.Values = append(bo.Values, right) + } else { + left = &BoolOp{Pos: pos, Op: "or", Values: []Expr{left, right}} + } + } + return left, nil +} + +// parseBoolAnd: boolNot ('and' boolNot)* +func (p *Parser) parseBoolAnd() (Expr, error) { + left, err := p.parseBoolNot() + if err != nil { + return nil, err + } + for p.cur.Kind == TokName && p.cur.Value == "and" { + pos := p.cur.Pos + p.advance() + right, err := p.parseBoolNot() + if err != nil { + return nil, err + } + if bo, ok := left.(*BoolOp); ok && bo.Op == "and" { + bo.Values = append(bo.Values, right) + } else { + left = &BoolOp{Pos: pos, Op: "and", Values: []Expr{left, right}} + } + } + return left, nil +} + +// parseBoolNot: 'not' boolNot | comparison +func (p *Parser) parseBoolNot() (Expr, error) { + if p.cur.Kind == TokName && p.cur.Value == "not" { + pos := p.cur.Pos + p.advance() + operand, err := p.parseBoolNot() + if err != nil { + return nil, err + } + return &UnaryOp{Pos: pos, Op: "not", Operand: operand}, nil + } + return p.parseComparison() +} + +// parseComparison: bitor (cmpop bitor)* +func (p *Parser) parseComparison() (Expr, error) { + left, err := p.parseBitOr() + if err != nil { + return nil, err + } + pos := p.cur.Pos + var ops []string + var comparators []Expr + + for { + op, ok := p.peekCmpOp() + if !ok { + break + } + right, err := p.parseBitOr() + if err != nil { + return nil, err + } + ops = append(ops, op) + comparators = append(comparators, right) + } + + if len(ops) == 0 { + return left, nil + } + return &Compare{Pos: pos, Left: left, Ops: ops, Comparators: comparators}, nil +} + +// peekCmpOp checks for a comparison operator and advances if found. +func (p *Parser) peekCmpOp() (string, bool) { + if p.cur.Kind == TokOp { + switch p.cur.Value { + case "==", "!=", "<", ">", "<=", ">=": + op := p.cur.Value + p.advance() + return op, true + } + } + if p.cur.Kind == TokName { + switch p.cur.Value { + case "in": + p.advance() + return "in", true + case "is": + p.advance() + if p.cur.Kind == TokName && p.cur.Value == "not" { + p.advance() + return "is not", true + } + return "is", true + case "not": + if p.peek.Kind == TokName && p.peek.Value == "in" { + p.advance() // consume 'not' + p.advance() // consume 'in' + return "not in", true + } + } + } + return "", false +} + +// parseBitOr: bitxor ('|' bitxor)* +func (p *Parser) parseBitOr() (Expr, error) { + left, err := p.parseBitXor() + if err != nil { + return nil, err + } + for p.check(TokOp, "|") { + pos := p.cur.Pos + p.advance() + right, err := p.parseBitXor() + if err != nil { + return nil, err + } + left = &BinOp{Pos: pos, Left: left, Right: right, Op: "|"} + } + return left, nil +} + +// parseBitXor: bitand ('^' bitand)* +func (p *Parser) parseBitXor() (Expr, error) { + left, err := p.parseBitAnd() + if err != nil { + return nil, err + } + for p.check(TokOp, "^") { + pos := p.cur.Pos + p.advance() + right, err := p.parseBitAnd() + if err != nil { + return nil, err + } + left = &BinOp{Pos: pos, Left: left, Right: right, Op: "^"} + } + return left, nil +} + +// parseBitAnd: shift ('&' shift)* +func (p *Parser) parseBitAnd() (Expr, error) { + left, err := p.parseShift() + if err != nil { + return nil, err + } + for p.check(TokOp, "&") { + pos := p.cur.Pos + p.advance() + right, err := p.parseShift() + if err != nil { + return nil, err + } + left = &BinOp{Pos: pos, Left: left, Right: right, Op: "&"} + } + return left, nil +} + +// parseShift: arith (('<<' | '>>') arith)* +func (p *Parser) parseShift() (Expr, error) { + left, err := p.parseArith() + if err != nil { + return nil, err + } + for p.cur.Kind == TokOp && (p.cur.Value == "<<" || p.cur.Value == ">>") { + op := p.cur.Value + pos := p.cur.Pos + p.advance() + right, err := p.parseArith() + if err != nil { + return nil, err + } + left = &BinOp{Pos: pos, Left: left, Right: right, Op: op} + } + return left, nil +} + +// parseArith: term (('+' | '-') term)* +func (p *Parser) parseArith() (Expr, error) { + left, err := p.parseTerm() + if err != nil { + return nil, err + } + for p.cur.Kind == TokOp && (p.cur.Value == "+" || p.cur.Value == "-") { + op := p.cur.Value + pos := p.cur.Pos + p.advance() + right, err := p.parseTerm() + if err != nil { + return nil, err + } + left = &BinOp{Pos: pos, Left: left, Right: right, Op: op} + } + return left, nil +} + +// parseTerm: factor (('*' | '/' | '//' | '%' | '@') factor)* +func (p *Parser) parseTerm() (Expr, error) { + left, err := p.parseFactor() + if err != nil { + return nil, err + } + for p.cur.Kind == TokOp { + op := p.cur.Value + if op != "*" && op != "/" && op != "//" && op != "%" && op != "@" { + break + } + pos := p.cur.Pos + p.advance() + right, err := p.parseFactor() + if err != nil { + return nil, err + } + left = &BinOp{Pos: pos, Left: left, Right: right, Op: op} + } + return left, nil +} + +// parseFactor: ('+' | '-' | '~') factor | power +func (p *Parser) parseFactor() (Expr, error) { + if p.cur.Kind == TokOp { + switch p.cur.Value { + case "+", "-", "~": + op := p.cur.Value + pos := p.cur.Pos + p.advance() + operand, err := p.parseFactor() + if err != nil { + return nil, err + } + return &UnaryOp{Pos: pos, Op: op, Operand: operand}, nil + } + } + return p.parsePower() +} + +// parsePower: postfix ('**' factor)? (right-associative) +func (p *Parser) parsePower() (Expr, error) { + base, err := p.parseAwait() + if err != nil { + return nil, err + } + if p.check(TokOp, "**") { + pos := p.cur.Pos + p.advance() + exp, err := p.parseFactor() + if err != nil { + return nil, err + } + return &BinOp{Pos: pos, Left: base, Right: exp, Op: "**"}, nil + } + return base, nil +} + +// parseAwait: 'await' postfix | postfix +func (p *Parser) parseAwait() (Expr, error) { + if p.cur.Kind == TokName && p.cur.Value == "await" { + p.advance() // consume 'await' — treat as no-op for now + } + return p.parsePostfix() +} + +// parsePostfix: primary ('.' Name | '[' subscript ']' | '(' arglist ')')* +func (p *Parser) parsePostfix() (Expr, error) { + node, err := p.parsePrimary() + if err != nil { + return nil, err + } + + for { + if p.check(TokOp, ".") { + pos := p.cur.Pos + p.advance() + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + node = &AttributeExpr{Pos: pos, Value: node, Attr: nameTok.Value} + continue + } + if p.check(TokOp, "[") { + pos := p.cur.Pos + p.advance() + slice, err := p.parseSubscript() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, "]"); err != nil { + return nil, err + } + node = &SubscriptExpr{Pos: pos, Value: node, Slice: slice} + continue + } + if p.check(TokOp, "(") { + pos := p.cur.Pos + p.advance() + args, keywords, err := p.parseCallArgs() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ")"); err != nil { + return nil, err + } + node = &CallExpr{Pos: pos, Func: node, Args: args, Keywords: keywords} + continue + } + break + } + return node, nil +} + +// parseSubscript parses a subscript expression (possibly a slice). +func (p *Parser) parseSubscript() (Expr, error) { + // Check for bare slice: :upper, ::step, etc. + if p.check(TokOp, ":") { + return p.parseSliceSuffix(nil) + } + // Could be expr or slice starting with expr. + first, err := p.parseExpr() + if err != nil { + return nil, err + } + if p.check(TokOp, ":") { + return p.parseSliceSuffix(first) + } + // Tuple subscript: a[1, 2] + if p.check(TokOp, ",") { + elts := []Expr{first} + for p.match(TokOp, ",") { + if p.check(TokOp, "]") { + break + } + e, err := p.parseExpr() + if err != nil { + return nil, err + } + elts = append(elts, e) + } + if len(elts) == 1 { + return elts[0], nil + } + return &TupleExpr{Pos: first.nodePos(), Elts: elts}, nil + } + return first, nil +} + +func (p *Parser) parseSliceSuffix(lower Expr) (Expr, error) { + pos := p.cur.Pos + if lower != nil { + pos = lower.nodePos() + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + var upper Expr + if !p.check(TokOp, ":") && !p.check(TokOp, "]") && !p.check(TokOp, ",") { + var err error + upper, err = p.parseExpr() + if err != nil { + return nil, err + } + } + var step Expr + if p.match(TokOp, ":") { + if !p.check(TokOp, "]") && !p.check(TokOp, ",") { + var err error + step, err = p.parseExpr() + if err != nil { + return nil, err + } + } + } + return &SliceExpr{Pos: pos, Lower: lower, Upper: upper, Step: step}, nil +} + +// parsePrimary parses a primary expression. +func (p *Parser) parsePrimary() (Expr, error) { + pos := p.cur.Pos + + switch p.cur.Kind { + case TokName: + name := p.cur.Value + p.advance() + switch name { + case "True": + return &Constant{Pos: pos, Value: true}, nil + case "False": + return &Constant{Pos: pos, Value: false}, nil + case "None": + return &Constant{Pos: pos, Value: nil}, nil + case "lambda": + return p.parseLambda() + case "yield": + return p.parseYieldExpr() + } + return &NameExpr{Pos: pos, Id: name}, nil + + case TokInt: + val, err := parseIntLiteral(p.cur.Value) + if err != nil { + return nil, p.syntaxErrorf("invalid integer literal %q: %v", p.cur.Value, err) + } + p.advance() + return &Constant{Pos: pos, Value: val}, nil + + case TokFloat: + val, err := parseFloatLiteral(p.cur.Value) + if err != nil { + return nil, p.syntaxErrorf("invalid float literal %q: %v", p.cur.Value, err) + } + p.advance() + return &Constant{Pos: pos, Value: val}, nil + + case TokString: + // Adjacent string concatenation. + val := p.cur.Value + p.advance() + for p.cur.Kind == TokString { + val += p.cur.Value + p.advance() + } + return &Constant{Pos: pos, Value: val}, nil + + case TokBytes: + val := []byte(p.cur.Value) + p.advance() + for p.cur.Kind == TokBytes { + val = append(val, []byte(p.cur.Value)...) + p.advance() + } + return &Constant{Pos: pos, Value: val}, nil + + case TokOp: + switch p.cur.Value { + case "(": + return p.parseParenExpr() + case "[": + return p.parseListExpr() + case "{": + return p.parseDictOrSetExpr() + case "*": + // Starred expression in assignment target. + p.advance() + val, err := p.parseExpr() + if err != nil { + return nil, err + } + return &Starred{Pos: pos, Value: val}, nil + } + } + + return nil, p.syntaxErrorf("unexpected token %s %q", tokenKindString(p.cur.Kind), p.cur.Value) +} + +// parseParenExpr parses a parenthesized expression, tuple, generator, or yield. +func (p *Parser) parseParenExpr() (Expr, error) { + pos := p.cur.Pos + p.advance() // consume '(' + + // Empty tuple. + if p.check(TokOp, ")") { + p.advance() + return &TupleExpr{Pos: pos, Elts: nil}, nil + } + + // yield expression inside parens. + if p.cur.Kind == TokName && p.cur.Value == "yield" { + e, err := p.parseYieldExpr() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ")"); err != nil { + return nil, err + } + return e, nil + } + + first, err := p.parseExpr() + if err != nil { + return nil, err + } + + // Generator expression. + if p.cur.Kind == TokName && p.cur.Value == "for" { + gens, err := p.parseComprehensions() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ")"); err != nil { + return nil, err + } + return &GeneratorExp{Pos: pos, Elt: first, Generators: gens}, nil + } + + // Tuple or single expression. + if p.check(TokOp, ",") { + elts := []Expr{first} + for p.match(TokOp, ",") { + if p.check(TokOp, ")") { + break + } + e, err := p.parseExpr() + if err != nil { + return nil, err + } + elts = append(elts, e) + } + if _, err := p.expect(TokOp, ")"); err != nil { + return nil, err + } + return &TupleExpr{Pos: pos, Elts: elts}, nil + } + + if _, err := p.expect(TokOp, ")"); err != nil { + return nil, err + } + return first, nil +} + +// parseListExpr parses a list literal or list comprehension. +func (p *Parser) parseListExpr() (Expr, error) { + pos := p.cur.Pos + p.advance() // consume '[' + + if p.check(TokOp, "]") { + p.advance() + return &ListExpr{Pos: pos, Elts: nil}, nil + } + + first, err := p.parseExpr() + if err != nil { + return nil, err + } + + // List comprehension. + if p.cur.Kind == TokName && p.cur.Value == "for" { + gens, err := p.parseComprehensions() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, "]"); err != nil { + return nil, err + } + return &ListComp{Pos: pos, Elt: first, Generators: gens}, nil + } + + elts := []Expr{first} + for p.match(TokOp, ",") { + if p.check(TokOp, "]") { + break + } + e, err := p.parseExpr() + if err != nil { + return nil, err + } + elts = append(elts, e) + } + if _, err := p.expect(TokOp, "]"); err != nil { + return nil, err + } + return &ListExpr{Pos: pos, Elts: elts}, nil +} + +// parseDictOrSetExpr parses a dict literal, set literal, dict comp, or set comp. +func (p *Parser) parseDictOrSetExpr() (Expr, error) { + pos := p.cur.Pos + p.advance() // consume '{' + + if p.check(TokOp, "}") { + p.advance() + return &DictExpr{Pos: pos}, nil + } + + // **unpack at start means dict. + if p.check(TokOp, "**") { + p.advance() + val, err := p.parseExpr() + if err != nil { + return nil, err + } + keys := []Expr{nil} + vals := []Expr{val} + for p.match(TokOp, ",") { + if p.check(TokOp, "}") { + break + } + if p.match(TokOp, "**") { + v, err := p.parseExpr() + if err != nil { + return nil, err + } + keys = append(keys, nil) + vals = append(vals, v) + } else { + k, err := p.parseExpr() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + v, err := p.parseExpr() + if err != nil { + return nil, err + } + keys = append(keys, k) + vals = append(vals, v) + } + } + if _, err := p.expect(TokOp, "}"); err != nil { + return nil, err + } + return &DictExpr{Pos: pos, Keys: keys, Values: vals}, nil + } + + first, err := p.parseExpr() + if err != nil { + return nil, err + } + + // Dict (key: value) or dict comp. + if p.check(TokOp, ":") { + p.advance() + firstVal, err := p.parseExpr() + if err != nil { + return nil, err + } + + // Dict comprehension. + if p.cur.Kind == TokName && p.cur.Value == "for" { + gens, err := p.parseComprehensions() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, "}"); err != nil { + return nil, err + } + return &DictComp{Pos: pos, Key: first, Value: firstVal, Generators: gens}, nil + } + + // Dict literal. + keys := []Expr{first} + vals := []Expr{firstVal} + for p.match(TokOp, ",") { + if p.check(TokOp, "}") { + break + } + if p.match(TokOp, "**") { + v, err := p.parseExpr() + if err != nil { + return nil, err + } + keys = append(keys, nil) + vals = append(vals, v) + continue + } + k, err := p.parseExpr() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + v, err := p.parseExpr() + if err != nil { + return nil, err + } + keys = append(keys, k) + vals = append(vals, v) + } + if _, err := p.expect(TokOp, "}"); err != nil { + return nil, err + } + return &DictExpr{Pos: pos, Keys: keys, Values: vals}, nil + } + + // Set or set comprehension. + if p.cur.Kind == TokName && p.cur.Value == "for" { + gens, err := p.parseComprehensions() + if err != nil { + return nil, err + } + if _, err := p.expect(TokOp, "}"); err != nil { + return nil, err + } + return &SetComp{Pos: pos, Elt: first, Generators: gens}, nil + } + + // Set literal. + elts := []Expr{first} + for p.match(TokOp, ",") { + if p.check(TokOp, "}") { + break + } + e, err := p.parseExpr() + if err != nil { + return nil, err + } + elts = append(elts, e) + } + if _, err := p.expect(TokOp, "}"); err != nil { + return nil, err + } + return &SetExpr{Pos: pos, Elts: elts}, nil +} + +// parseComprehensions parses one or more 'for target in iter (if cond)*' clauses. +func (p *Parser) parseComprehensions() ([]*Comprehension, error) { + var gens []*Comprehension + for p.cur.Kind == TokName && (p.cur.Value == "for" || p.cur.Value == "async") { + if p.cur.Value == "async" { + p.advance() // skip 'async' + } + if _, err := p.expect(TokName, "for"); err != nil { + return nil, err + } + target, err := p.parseTargetList() + if err != nil { + return nil, err + } + if _, err := p.expect(TokName, "in"); err != nil { + return nil, err + } + iter, err := p.parseBoolOr() // avoid consuming trailing 'for'/'if' + if err != nil { + return nil, err + } + // Handle comma-separated iterables: for x in a, b — wrap in tuple. + if p.check(TokOp, ",") { + iters := []Expr{iter} + for p.match(TokOp, ",") { + if p.cur.Kind == TokName && (p.cur.Value == "for" || p.cur.Value == "if" || p.cur.Value == "async") { + break + } + if p.check(TokOp, "]") || p.check(TokOp, ")") || p.check(TokOp, "}") { + break + } + e, err := p.parseBoolOr() + if err != nil { + return nil, err + } + iters = append(iters, e) + } + if len(iters) > 1 { + iter = &TupleExpr{Pos: iter.nodePos(), Elts: iters} + } + } + + comp := &Comprehension{Target: target, Iter: iter} + for p.cur.Kind == TokName && p.cur.Value == "if" { + p.advance() + cond, err := p.parseBoolNot() // if condition: no ternary + if err != nil { + return nil, err + } + comp.Ifs = append(comp.Ifs, cond) + } + gens = append(gens, comp) + } + return gens, nil +} + +// parseLambda parses a lambda expression (after 'lambda' has been consumed by parsePrimary). +func (p *Parser) parseLambda() (Expr, error) { + pos := p.cur.Pos + // parsePrimary already consumed 'lambda' + var args *Arguments + var err error + if !p.check(TokOp, ":") { + args, err = p.parseLambdaArgs() + if err != nil { + return nil, err + } + } else { + args = &Arguments{} + } + if _, err := p.expect(TokOp, ":"); err != nil { + return nil, err + } + body, err := p.parseTernary() + if err != nil { + return nil, err + } + return &Lambda{Pos: pos, Args: args, Body: body}, nil +} + +// parseLambdaArgs parses simplified lambda argument list (no annotations, no defaults… well, defaults yes). +func (p *Parser) parseLambdaArgs() (*Arguments, error) { + args := &Arguments{} + afterStar := false + for !p.check(TokOp, ":") && p.cur.Kind != TokEOF { + if p.match(TokOp, "**") { + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + args.Kwarg = nameTok.Value + p.match(TokOp, ",") + break + } + if p.match(TokOp, "*") { + if p.check(TokOp, ",") || p.check(TokOp, ":") { + afterStar = true + } else { + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + args.Vararg = nameTok.Value + afterStar = true + } + if p.check(TokOp, ",") { + p.advance() + continue + } + break + } + nameTok, err := p.expect(TokName, "") + if err != nil { + return nil, err + } + var defaultVal Expr + if p.match(TokOp, "=") { + defaultVal, err = p.parseTernary() + if err != nil { + return nil, err + } + } + if afterStar { + args.KwOnly = append(args.KwOnly, nameTok.Value) + args.KwDefaults = append(args.KwDefaults, defaultVal) + } else { + args.Args = append(args.Args, nameTok.Value) + args.Defaults = append(args.Defaults, defaultVal) + } + if !p.match(TokOp, ",") { + break + } + } + return args, nil +} + +// parseYieldExpr parses a yield or yield from expression. +// Called after 'yield' keyword has been identified but NOT consumed. +func (p *Parser) parseYieldExpr() (Expr, error) { + pos := p.cur.Pos + p.advance() // consume 'yield' + + if p.cur.Kind == TokName && p.cur.Value == "from" { + p.advance() + val, err := p.parseExpr() + if err != nil { + return nil, err + } + return &YieldFrom{Pos: pos, Value: val}, nil + } + + if p.cur.Kind == TokNewline || p.cur.Kind == TokEOF || + p.check(TokOp, ")") || p.check(TokOp, "]") || p.check(TokOp, "}") || + p.check(TokOp, ";") || p.check(TokOp, ",") { + return &Yield{Pos: pos}, nil + } + + val, err := p.parseTestList() + if err != nil { + return nil, err + } + return &Yield{Pos: pos, Value: val}, nil +} + +// parseCallArgs parses the argument list in a function call. +func (p *Parser) parseCallArgs() ([]Expr, []*Keyword, error) { + var args []Expr + var keywords []*Keyword + + for !p.check(TokOp, ")") && p.cur.Kind != TokEOF { + p.skipNewlines() + if p.check(TokOp, ")") { + break + } + + // **kwargs + if p.match(TokOp, "**") { + val, err := p.parseExpr() + if err != nil { + return nil, nil, err + } + keywords = append(keywords, &Keyword{Arg: "", Value: val}) + if !p.match(TokOp, ",") { + break + } + continue + } + + // *args + if p.match(TokOp, "*") { + val, err := p.parseExpr() + if err != nil { + return nil, nil, err + } + args = append(args, &Starred{Pos: val.nodePos(), Value: val}) + if !p.match(TokOp, ",") { + break + } + continue + } + + // yield inside call. + if p.cur.Kind == TokName && p.cur.Value == "yield" { + e, err := p.parseYieldExpr() + if err != nil { + return nil, nil, err + } + args = append(args, e) + if !p.match(TokOp, ",") { + break + } + continue + } + + expr, err := p.parseExpr() + if err != nil { + return nil, nil, err + } + + // Generator expression as sole argument: f(x*x for x in iter) + if p.cur.Kind == TokName && p.cur.Value == "for" { + gens, err := p.parseComprehensions() + if err != nil { + return nil, nil, err + } + args = append(args, &GeneratorExp{Pos: expr.nodePos(), Elt: expr, Generators: gens}) + // generator expression must be the only argument + break + } + + // Keyword argument: name=value + if p.check(TokOp, "=") { + nameExpr, ok := expr.(*NameExpr) + if !ok { + return nil, nil, p.syntaxErrorf("keyword argument must be a name") + } + p.advance() // consume '=' + val, err := p.parseExpr() + if err != nil { + return nil, nil, err + } + keywords = append(keywords, &Keyword{Arg: nameExpr.Id, Value: val}) + } else { + args = append(args, expr) + } + + if !p.match(TokOp, ",") { + break + } + } + + return args, keywords, nil +} + +// parseArgList parses a comma-separated list of expressions (for class bases, etc.). +func (p *Parser) parseArgList() ([]Expr, error) { + var exprs []Expr + for { + if p.check(TokOp, ")") || p.cur.Kind == TokEOF { + break + } + if p.match(TokOp, "**") { + val, err := p.parseExpr() + if err != nil { + return nil, err + } + exprs = append(exprs, &Starred{Pos: val.nodePos(), Value: val}) + } else if p.match(TokOp, "*") { + val, err := p.parseExpr() + if err != nil { + return nil, err + } + exprs = append(exprs, &Starred{Pos: val.nodePos(), Value: val}) + } else { + e, err := p.parseExpr() + if err != nil { + return nil, err + } + // Skip keyword args (name=value) in class bases. + if p.check(TokOp, "=") { + p.advance() + _, err = p.parseExpr() + if err != nil { + return nil, err + } + // Don't add keyword arguments to bases list. + } else { + exprs = append(exprs, e) + } + } + if !p.match(TokOp, ",") { + break + } + } + return exprs, nil +} + +// parseTestList parses a comma-separated list of expressions (possibly a tuple). +func (p *Parser) parseTestList() (Expr, error) { + pos := p.cur.Pos + first, err := p.parseExpr() + if err != nil { + return nil, err + } + if !p.check(TokOp, ",") { + return first, nil + } + elts := []Expr{first} + for p.match(TokOp, ",") { + if isEndOfExprList(p.cur) { + break + } + e, err := p.parseExpr() + if err != nil { + return nil, err + } + elts = append(elts, e) + } + return &TupleExpr{Pos: pos, Elts: elts}, nil +} + +// parseTestlistStarExpr parses a comma-separated expression list that may +// include starred expressions. +func (p *Parser) parseTestlistStarExpr() (Expr, error) { + pos := p.cur.Pos + + var first Expr + var err error + if p.check(TokOp, "*") { + starPos := p.cur.Pos + p.advance() + val, err := p.parseExpr() + if err != nil { + return nil, err + } + first = &Starred{Pos: starPos, Value: val} + } else { + first, err = p.parseExpr() + if err != nil { + return nil, err + } + } + + if !p.check(TokOp, ",") { + return first, nil + } + + elts := []Expr{first} + for p.match(TokOp, ",") { + if isEndOfExprList(p.cur) { + break + } + var e Expr + if p.check(TokOp, "*") { + starPos := p.cur.Pos + p.advance() + val, err := p.parseExpr() + if err != nil { + return nil, err + } + e = &Starred{Pos: starPos, Value: val} + } else { + e, err = p.parseExpr() + if err != nil { + return nil, err + } + } + elts = append(elts, e) + } + return &TupleExpr{Pos: pos, Elts: elts}, nil +} + +// parseExprList parses a comma-separated list of expressions (for del etc.). +func (p *Parser) parseExprList() ([]Expr, error) { + var exprs []Expr + for { + e, err := p.parseExpr() + if err != nil { + return nil, err + } + exprs = append(exprs, e) + if !p.match(TokOp, ",") { + break + } + if isEndOfExprList(p.cur) { + break + } + } + return exprs, nil +} + +// parseTargetList parses a for-loop target (possibly a tuple). +// We use parseBitOr rather than parseExpr to avoid consuming the 'in' keyword +// that follows the target in for-loops and comprehensions. +func (p *Parser) parseTargetList() (Expr, error) { + pos := p.cur.Pos + first, err := p.parseTargetExpr() + if err != nil { + return nil, err + } + if !p.check(TokOp, ",") { + return first, nil + } + elts := []Expr{first} + for p.match(TokOp, ",") { + if p.cur.Kind == TokName && p.cur.Value == "in" { + break + } + if isEndOfExprList(p.cur) { + break + } + e, err := p.parseTargetExpr() + if err != nil { + return nil, err + } + elts = append(elts, e) + } + if len(elts) == 1 { + return elts[0], nil + } + return &TupleExpr{Pos: pos, Elts: elts}, nil +} + +// parseTargetExpr parses a single for-loop target element without consuming 'in'. +// Valid targets: names, attributes, subscripts, starred, parenthesised tuples/lists. +func (p *Parser) parseTargetExpr() (Expr, error) { + // Handle starred: *x + if p.check(TokOp, "*") { + pos := p.cur.Pos + p.advance() + inner, err := p.parseBitOr() + if err != nil { + return nil, err + } + return &Starred{Pos: pos, Value: inner}, nil + } + // Parenthesised or bracketed targets + if p.check(TokOp, "(") || p.check(TokOp, "[") { + open := p.cur.Value + close_ := ")" + if open == "[" { + close_ = "]" + } + pos := p.cur.Pos + p.advance() + var elts []Expr + for !p.check(TokOp, close_) && p.cur.Kind != TokEOF { + e, err := p.parseTargetExpr() + if err != nil { + return nil, err + } + elts = append(elts, e) + if !p.match(TokOp, ",") { + break + } + } + if _, err := p.expect(TokOp, close_); err != nil { + return nil, err + } + if open == "[" { + return &ListExpr{Pos: pos, Elts: elts}, nil + } + if len(elts) == 1 { + return elts[0], nil + } + return &TupleExpr{Pos: pos, Elts: elts}, nil + } + // Otherwise parse as postfix expression (name, attr, subscript) + return p.parsePostfix() +} + +// isEndOfExprList returns true if the token ends a comma-separated expression list. +func isEndOfExprList(t Token) bool { + if t.Kind == TokEOF || t.Kind == TokNewline || t.Kind == TokDedent { + return true + } + if t.Kind == TokOp { + switch t.Value { + case ")", "]", "}", ";", "=", ":": + return true + } + } + if t.Kind == TokName { + switch t.Value { + case "for", "in", "if", "else", "elif": + return true + } + } + return false +} + +// ---- Literal parsing helpers ---- + +// parseIntLiteral converts a Python integer literal string to int64 or *big.Int. +func parseIntLiteral(s string) (interface{}, error) { + // Remove underscores. + s = strings.ReplaceAll(s, "_", "") + if s == "" { + return int64(0), nil + } + + base := 10 + orig := s + if len(s) >= 2 && s[0] == '0' { + switch s[1] { + case 'x', 'X': + base = 16 + s = s[2:] + case 'o', 'O': + base = 8 + s = s[2:] + case 'b', 'B': + base = 2 + s = s[2:] + } + } + + // Try int64 first. + if n, err := strconv.ParseInt(s, base, 64); err == nil { + return n, nil + } + // Try uint64 (only if the value fits in int64 to avoid silent wrap-around). + if n, err := strconv.ParseUint(s, base, 64); err == nil && n <= math.MaxInt64 { + return int64(n), nil + } + // Fall back to big.Int. + bi := new(big.Int) + if _, ok := bi.SetString(orig, 0); ok { + if bi.IsInt64() { + return bi.Int64(), nil + } + return bi, nil + } + return nil, fmt.Errorf("cannot parse integer %q", orig) +} + +// parseFloatLiteral converts a Python float literal string to float64. +func parseFloatLiteral(s string) (float64, error) { + // Remove underscores and 'j'/'J' suffix (complex). + s = strings.ReplaceAll(s, "_", "") + s = strings.TrimRight(s, "jJ") + return strconv.ParseFloat(s, 64) +} + +// ensure imports are used +var _ = utf8.RuneLen +var _ = big.NewInt diff --git a/builtins/python/pyruntime.go b/builtins/python/pyruntime.go new file mode 100644 index 00000000..afc9cbf7 --- /dev/null +++ b/builtins/python/pyruntime.go @@ -0,0 +1,102 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "bufio" + "context" + "fmt" + "io" +) + +// Run executes Python source code in a sandboxed context. +// Returns the exit code: 0 = success, 1 = unhandled exception/error, N = sys.exit(N). +func Run(ctx context.Context, opts RunOpts) int { + type result struct{ code int } + ch := make(chan result, 1) + go func() { + ch <- result{code: runInternal(ctx, opts)} + }() + select { + case r := <-ch: + return r.code + case <-ctx.Done(): + // Wait for the goroutine to finish before returning to avoid data races + // on opts.Stderr: runInternal may still write traceback output after the + // context fires. Waiting here is safe because the evaluator checks + // ctx.Done() at each loop iteration and returns promptly. + <-ch + return 1 + } +} + +func runInternal(ctx context.Context, opts RunOpts) (exitCode int) { + // Propagate the execution context into RunOpts so that sandbox I/O calls + // (Open, Stat, ReadDir) respect the shell's cancellation deadline. + opts.Ctx = ctx + + // Wrap stdin in a single global LimitReader so that all input() calls and + // sys.stdin.read*() calls share one cumulative byte budget. Without this, + // each input() call gets a fresh 1 MiB window, allowing a script that calls + // input() in a loop to read unbounded data from /dev/zero-like sources. + if opts.Stdin != nil { + opts.Stdin = io.LimitReader(opts.Stdin, int64(maxFileReadBytes)) + // A single persistent bufio.Reader shared across all input() and + // sys.stdin.readline() calls so that read-ahead bytes are not dropped + // between calls. + opts.stdinReader = bufio.NewReader(opts.Stdin) + } + + // Parse + mod, err := Parse(opts.Source+"\n", opts.SourceName) + if err != nil { + fmt.Fprintf(opts.Stderr, " File %q\n (at parse time)\nSyntaxError: %v\n", opts.SourceName, err) + return 1 + } + + // Build globals: builtins + module-level names + globals := makeBuiltins(&opts) + globals["__name__"] = pyStr("__main__") + globals["__file__"] = pyStr(opts.SourceName) + + // Module cache + modules := map[string]*PyModule{} + + // Create evaluator; cleanup deregisters the goroutine's callObject entry. + eval, cleanup := newEvaluator(ctx, &opts, globals, modules) + defer cleanup() + + // Catch sys.exit and unhandled exceptions + defer func() { + r := recover() + if r == nil { + return + } + switch sig := r.(type) { + case controlSignal: + if sig.kind == ctrlSysExit { + if code, ok := sig.value.(*PyInt); ok { + if n, ok2 := code.int64(); ok2 { + exitCode = int(n) + return + } + } + exitCode = 1 + } else { + exitCode = 1 + } + case exceptionSignal: + printTraceback(opts.Stderr, sig.exc) + exitCode = 1 + default: + // Real Go panic — re-panic + panic(r) + } + }() + + eval.exec(mod.Body) + return 0 +} diff --git a/builtins/python/python.go b/builtins/python/python.go new file mode 100644 index 00000000..60142861 --- /dev/null +++ b/builtins/python/python.go @@ -0,0 +1,205 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Package python implements the python builtin command. +// +// python — run Python 3 scripts or inline code +// +// Usage: python [-c code] [--help] [script | -] [arg ...] +// +// Execute Python source code using a built-in pure-Go Python 3 interpreter. +// No CPython installation is required. +// +// Input modes (mutually exclusive; first one wins): +// +// -c code +// Execute Python code given as a string. +// Example: python -c "print(1+2)" +// +// script +// Execute a Python script file. The file is opened via the +// AllowedPaths sandbox, so only files within configured allowed +// paths may be read. +// +// - (or no argument) +// Read Python code from standard input. +// +// Additional positional arguments after the script/- are passed as +// sys.argv[1:]. +// +// Accepted flags: +// +// -c code +// Program passed in as string. +// +// -h, --help +// Print usage to stdout and exit 0. +// +// Security restrictions: +// +// - os.system(), os.popen() and all OS process-spawning functions are +// absent from the os module. Calling them raises AttributeError. +// - File-system mutation functions (os.remove, os.mkdir, os.makedirs, +// os.rmdir, os.removedirs, os.rename, os.link, os.symlink, etc.) are +// absent. +// - The built-in open() is replaced with a read-only version that routes +// through the shell's AllowedPaths sandbox. Write/append modes raise +// PermissionError. +// - tempfile, glob, subprocess, socket, ctypes raise ImportError when +// imported. +// +// Supported stdlib modules: math, string, sys, os (read-only), binascii, +// json (dumps only; loads raises ValueError), collections (OrderedDict, +// Counter, deque, defaultdict — but defaultdict default_factory raises +// NotImplementedError if provided). +// Blocked modules: subprocess, socket, ctypes, tempfile, glob, threading, +// multiprocessing, asyncio, re. +// +// Known limitations: +// +// - `import os.path` raises ImportError; use `from os import path` instead. +// - goroutineID() parses runtime.Stack output which is undocumented; if the +// format changes in a future Go release the evaluator degrades gracefully +// (callbacks go through a no-op context) but may panic on some code paths. +// +// Exit codes: +// +// 0 Python code ran successfully (or sys.exit(0)). +// N sys.exit(N) was called with integer N. +// 1 An unhandled Python exception occurred, a file could not be opened, +// or the code string / script was empty. +// +// Memory safety: +// +// Script files and stdin input are read through bounded buffers capped +// at 1 MiB. open().read() calls inside Python scripts are also bounded +// at 1 MiB per call to prevent memory exhaustion. All context- +// cancellation signals are respected; if the shell's execution timeout +// fires Python is abandoned. +package python + +import ( + "context" + "io" + "os" + + "github.com/DataDog/rshell/builtins" +) + +// Cmd is the python builtin command descriptor. +var Cmd = builtins.Command{ + Name: "python", + Description: "run Python 3 scripts or inline code", + MakeFlags: registerFlags, +} + +// maxSourceBytes is the maximum size of a script read from a file or stdin. +const maxSourceBytes = 1 << 20 // 1 MiB + +func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { + help := fs.BoolP("help", "h", false, "print usage and exit") + code := fs.StringP("cmd", "c", "", "program passed in as string") + + return func(ctx context.Context, callCtx *builtins.CallContext, args []string) builtins.Result { + if *help { + callCtx.Out("Usage: python [-c code] [-h] [script | -] [arg ...]\n\n") + callCtx.Out("Run Python 3 source code (built-in pure-Go interpreter).\n\n") + fs.SetOutput(callCtx.Stdout) + fs.PrintDefaults() + callCtx.Out("\nSecurity restrictions: os.system/write/delete blocked; open() is read-only.\n") + callCtx.Out("Stdlib: math, string, sys, os (read-only), binascii, json (dumps only), collections.\n") + callCtx.Out("Blocked modules (raise ImportError): subprocess, socket, ctypes, tempfile, glob, threading, multiprocessing, asyncio, re.\n") + return builtins.Result{} + } + + // Determine source and source name. + var ( + source string + sourceName string + extraArgs []string + ) + + if fs.Changed("cmd") { + // -c mode: source is the flag value; args are extra argv. + source = *code + sourceName = "" + extraArgs = args + } else if len(args) == 0 || args[0] == "-" { + // Stdin mode. + sourceName = "" + if len(args) > 0 { + extraArgs = args[1:] + } + if callCtx.Stdin == nil { + callCtx.Errf("python: no stdin available\n") + return builtins.Result{Code: 1} + } + src, err := readBounded(callCtx.Stdin, maxSourceBytes) + if err != nil { + callCtx.Errf("python: reading stdin: %v\n", err) + return builtins.Result{Code: 1} + } + source = src + } else { + // File mode. + scriptPath := args[0] + extraArgs = args[1:] + sourceName = scriptPath + + f, err := callCtx.OpenFile(ctx, scriptPath, os.O_RDONLY, 0) + if err != nil { + callCtx.Errf("python: can't open file '%s': %v\n", scriptPath, callCtx.PortableErr(err)) + return builtins.Result{Code: 1} + } + defer f.Close() + + src, err := readBounded(f, maxSourceBytes) + if err != nil { + callCtx.Errf("python: reading '%s': %v\n", scriptPath, err) + return builtins.Result{Code: 1} + } + source = src + } + + exitCode := Run(ctx, RunOpts{ + Source: source, + SourceName: sourceName, + Stdin: callCtx.Stdin, + Stdout: callCtx.Stdout, + Stderr: callCtx.Stderr, + Open: callCtx.OpenFile, + Stat: callCtx.StatFile, + ReadDir: callCtx.ReadDir, + Args: extraArgs, + }) + + if exitCode != 0 { + // Exit codes > 255 are truncated to uint8 (POSIX behaviour: exit codes + // are mod 256, matching CPython's behaviour on Linux/macOS). + return builtins.Result{Code: uint8(exitCode)} + } + return builtins.Result{} + } +} + +// readBounded reads at most maxBytes from r and returns the contents as a string. +// Returns an error if the source exceeds the limit. +func readBounded(r io.Reader, maxBytes int64) (string, error) { + limited := io.LimitReader(r, maxBytes+1) + data, err := io.ReadAll(limited) + if err != nil { + return "", err + } + if int64(len(data)) > maxBytes { + return "", &sourceTooBigError{limit: maxBytes} + } + return string(data), nil +} + +type sourceTooBigError struct{ limit int64 } + +func (e *sourceTooBigError) Error() string { + return "source code exceeds maximum size limit" +} diff --git a/builtins/python/smoke_test.go b/builtins/python/smoke_test.go new file mode 100644 index 00000000..64eafa57 --- /dev/null +++ b/builtins/python/smoke_test.go @@ -0,0 +1,92 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "bytes" + "context" + "fmt" + "io" + "io/fs" + "os" + "testing" +) + +func noFileOpen(ctx context.Context, path string, flags int, mode os.FileMode) (io.ReadWriteCloser, error) { + return nil, fmt.Errorf("no file access") +} + +func noStat(_ context.Context, _ string) (fs.FileInfo, error) { + return nil, fmt.Errorf("no file access") +} + +func noReadDir(_ context.Context, _ string) ([]fs.DirEntry, error) { + return nil, fmt.Errorf("no file access") +} + +func TestSmokeEval(t *testing.T) { + tests := []struct { + name string + code string + expect string + }{ + {"hello", `print("hello world")`, "hello world\n"}, + {"arithmetic", `print(2 + 3 * 4)`, "14\n"}, + {"list comp", `print([x*2 for x in range(5)])`, "[0, 2, 4, 6, 8]\n"}, + {"fib", ` +def fib(n): + if n <= 1: + return n + return fib(n-1) + fib(n-2) +print(fib(10))`, "55\n"}, + {"class", ` +class Dog: + def __init__(self, name): + self.name = name + def bark(self): + return "Woof! " + self.name + +d = Dog("Rex") +print(d.bark())`, "Woof! Rex\n"}, + {"generator", ` +def gen(): + for i in range(3): + yield i * i +print(list(gen()))`, "[0, 1, 4]\n"}, + {"exception", ` +try: + raise ValueError("oops") +except ValueError as e: + print("caught:", e)`, "caught: oops\n"}, + {"closure", ` +def make_adder(n): + def adder(x): + return x + n + return adder +add5 = make_adder(5) +print(add5(3))`, "8\n"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var buf bytes.Buffer + var ebuf bytes.Buffer + code := Run(context.Background(), RunOpts{ + Source: tt.code, + SourceName: "", + Stdout: &buf, + Stderr: &ebuf, + Open: noFileOpen, + Stat: noStat, + ReadDir: noReadDir, + }) + got := buf.String() + if code != 0 || got != tt.expect { + t.Errorf("code=%d, got=%q, want=%q, stderr=%q", code, got, tt.expect, ebuf.String()) + } + }) + } +} diff --git a/builtins/python/types.go b/builtins/python/types.go new file mode 100644 index 00000000..ed4a8add --- /dev/null +++ b/builtins/python/types.go @@ -0,0 +1,3386 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python + +import ( + "bufio" + "context" + "fmt" + "io" + "io/fs" + "math/big" + "os" + "runtime" + "strconv" + "strings" + "sync" + "unicode/utf8" +) + +// RunOpts configures a single Python execution. +type RunOpts struct { + // Source is the Python source code to execute. + Source string + + // SourceName is the name shown in tracebacks (e.g. "", "script.py"). + SourceName string + + // Ctx is the execution context. Sandbox I/O calls (Open, Stat, ReadDir) use + // this context so they respect the shell's cancellation deadline. Set by + // runInternal; callers should leave it nil (it will be populated automatically). + Ctx context.Context + + // Stdin is Python's sys.stdin reader. If nil, stdin returns EOF immediately. + Stdin io.Reader + + // Stdout receives all output from Python print() statements. + Stdout io.Writer + + // Stderr receives Python tracebacks and error messages. + Stderr io.Writer + + // Open opens a file for reading within the shell's AllowedPaths sandbox. + Open func(ctx context.Context, path string, flags int, mode os.FileMode) (io.ReadWriteCloser, error) + + // Stat returns file metadata within the shell's AllowedPaths sandbox (follows symlinks). + Stat func(ctx context.Context, path string) (fs.FileInfo, error) + + // ReadDir lists a directory within the shell's AllowedPaths sandbox. + ReadDir func(ctx context.Context, path string) ([]fs.DirEntry, error) + + // Args are additional arguments appended to sys.argv after SourceName. + Args []string + + // stdinReader is a single persistent bufio.Reader wrapping Stdin, shared + // across all input() calls so that read-ahead bytes are not lost between calls. + // Initialised by runInternal once Stdin has been wrapped in its LimitReader. + stdinReader *bufio.Reader +} + +// ---- Control flow signals ---- + +// controlKind identifies the kind of non-exception control signal. +type controlKind int + +const ( + ctrlReturn controlKind = iota + ctrlBreak + ctrlContinue + ctrlSysExit + ctrlGeneratorExit +) + +// controlSignal is panicked for return/break/continue/sys.exit. +type controlSignal struct { + kind controlKind + value Object // return value or sys.exit code +} + +// exceptionSignal is panicked for Python exceptions. +type exceptionSignal struct { + exc *PyException +} + +// ---- Object interface ---- + +// Object is the universal Python value. +type Object interface { + pyType() *PyType + pyRepr() string + pyStr() string +} + +// ---- PyType ---- + +// PyType represents a Python type object. +type PyType struct { + Name string + Bases []*PyType // for isinstance checks on built-in types +} + +func (t *PyType) pyType() *PyType { return typeType } +func (t *PyType) pyRepr() string { return "" } +func (t *PyType) pyStr() string { return t.pyRepr() } + +// Built-in type objects. +var ( + typeType = &PyType{Name: "type"} + typeNone = &PyType{Name: "NoneType"} + typeBool = &PyType{Name: "bool"} + typeInt = &PyType{Name: "int"} + typeFloat = &PyType{Name: "float"} + typeStr = &PyType{Name: "str"} + typeBytes = &PyType{Name: "bytes"} + typeList = &PyType{Name: "list"} + typeTuple = &PyType{Name: "tuple"} + typeDict = &PyType{Name: "dict"} + typeSet = &PyType{Name: "set"} + typeFrozenSet = &PyType{Name: "frozenset"} + typeFunction = &PyType{Name: "function"} + typeBuiltin = &PyType{Name: "builtin_function_or_method"} + typeModule = &PyType{Name: "module"} + typeRange = &PyType{Name: "range"} + typeSlice = &PyType{Name: "slice"} + typeClass = &PyType{Name: "type"} // user-defined class type + typeBoundMethod = &PyType{Name: "method"} + typeGenerator = &PyType{Name: "generator"} + typeMapIter = &PyType{Name: "map"} + typeFilterIter = &PyType{Name: "filter"} + typeZipIter = &PyType{Name: "zip"} + typeEnumerateIter = &PyType{Name: "enumerate"} + typeReversedIter = &PyType{Name: "list_reverseiterator"} + typeFile = &PyType{Name: "TextIOWrapper"} +) + +// ---- Singletons ---- + +var ( + pyNone = &PyNone{} + pyTrue = &PyBool{v: true} + pyFalse = &PyBool{v: false} +) + +// PyNone is the Python None singleton. +type PyNone struct{} + +func (n *PyNone) pyType() *PyType { return typeNone } +func (n *PyNone) pyRepr() string { return "None" } +func (n *PyNone) pyStr() string { return "None" } + +// PyBool is the Python bool type. +type PyBool struct{ v bool } + +func (b *PyBool) pyType() *PyType { return typeBool } +func (b *PyBool) pyRepr() string { + if b.v { + return "True" + } + return "False" +} +func (b *PyBool) pyStr() string { return b.pyRepr() } + +func pyBool(v bool) *PyBool { + if v { + return pyTrue + } + return pyFalse +} + +// ---- PyInt ---- + +// Small int cache (-5 to 256) +var smallInts [262]*PyInt + +func init() { + for i := 0; i < 262; i++ { + smallInts[i] = &PyInt{small: int64(i - 5)} + } +} + +// PyInt is the Python int type, backed by int64 or *big.Int for large values. +type PyInt struct { + small int64 // used when big == nil + big *big.Int // non-nil for large values +} + +func pyInt(n int64) *PyInt { + if n >= -5 && n <= 256 { + return smallInts[n+5] + } + return &PyInt{small: n} +} + +func pyIntBig(n *big.Int) *PyInt { + if n.IsInt64() { + return pyInt(n.Int64()) + } + return &PyInt{big: new(big.Int).Set(n)} +} + +func (i *PyInt) int64() (int64, bool) { + if i.big == nil { + return i.small, true + } + if i.big.IsInt64() { + return i.big.Int64(), true + } + return 0, false +} + +func (i *PyInt) toBigInt() *big.Int { + if i.big != nil { + return new(big.Int).Set(i.big) + } + return big.NewInt(i.small) +} + +func (i *PyInt) pyType() *PyType { return typeInt } +func (i *PyInt) pyRepr() string { + if i.big != nil { + return i.big.String() + } + return strconv.FormatInt(i.small, 10) +} +func (i *PyInt) pyStr() string { return i.pyRepr() } + +// ---- PyFloat ---- + +// PyFloat is the Python float type. +type PyFloat struct{ v float64 } + +func pyFloat(v float64) *PyFloat { return &PyFloat{v: v} } +func (f *PyFloat) pyType() *PyType { return typeFloat } +func (f *PyFloat) pyRepr() string { + // Match Python's float repr: use shortest decimal that round-trips + s := strconv.FormatFloat(f.v, 'g', -1, 64) + // If there's no decimal point and no exponent, add .0 + if !strings.ContainsAny(s, ".eEn") && s != "inf" && s != "-inf" { + s += ".0" + } + return s +} +func (f *PyFloat) pyStr() string { return f.pyRepr() } + +// ---- PyStr ---- + +// PyStr is the Python str type. +type PyStr struct{ v string } + +func pyStr(s string) *PyStr { return &PyStr{v: s} } +func (s *PyStr) pyType() *PyType { return typeStr } +func (s *PyStr) pyRepr() string { + // Single-quoted, escaped + var b strings.Builder + b.WriteByte('\'') + for _, r := range s.v { + switch r { + case '\'': + b.WriteString("\\'") + case '\\': + b.WriteString("\\\\") + case '\n': + b.WriteString("\\n") + case '\r': + b.WriteString("\\r") + case '\t': + b.WriteString("\\t") + default: + if r < 32 || r == 127 { + fmt.Fprintf(&b, "\\x%02x", r) + } else { + b.WriteRune(r) + } + } + } + b.WriteByte('\'') + return b.String() +} +func (s *PyStr) pyStr() string { return s.v } + +// strGetAttr returns a bound method builtin for string attribute access. +func strGetAttr(s *PyStr, name string) (Object, bool) { + switch name { + case "upper": + return makeBuiltin("upper", func(args []Object, kwargs map[string]Object) Object { + return pyStr(strings.ToUpper(s.v)) + }), true + case "lower": + return makeBuiltin("lower", func(args []Object, kwargs map[string]Object) Object { + return pyStr(strings.ToLower(s.v)) + }), true + case "strip": + return makeBuiltin("strip", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 || args[0] == pyNone { + return pyStr(strings.TrimSpace(s.v)) + } + chars := mustStr(args[0], "strip") + return pyStr(strings.Trim(s.v, chars)) + }), true + case "lstrip": + return makeBuiltin("lstrip", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 || args[0] == pyNone { + return pyStr(strings.TrimLeftFunc(s.v, func(r rune) bool { return strings.ContainsRune(" \t\n\r\x0b\x0c", r) })) + } + chars := mustStr(args[0], "lstrip") + return pyStr(strings.TrimLeft(s.v, chars)) + }), true + case "rstrip": + return makeBuiltin("rstrip", func(args []Object, kwargs map[string]Object) Object { + if len(args) == 0 || args[0] == pyNone { + return pyStr(strings.TrimRightFunc(s.v, func(r rune) bool { return strings.ContainsRune(" \t\n\r\x0b\x0c", r) })) + } + chars := mustStr(args[0], "rstrip") + return pyStr(strings.TrimRight(s.v, chars)) + }), true + case "split": + return makeBuiltin("split", func(args []Object, kwargs map[string]Object) Object { + sep := "" + maxsplit := -1 + if len(args) > 0 && args[0] != pyNone { + sep = mustStr(args[0], "split") + } + if len(args) > 1 { + if n, ok := args[1].(*PyInt); ok { + if v, ok2 := n.int64(); ok2 { + maxsplit = int(v) + } + } + } + var parts []string + if sep == "" { + // Split on whitespace, removing empty strings + fields := strings.Fields(s.v) + if maxsplit >= 0 && len(fields) > maxsplit+1 { + // rejoin the rest + parts = fields[:maxsplit] + rest := strings.Join(fields[maxsplit:], " ") + parts = append(parts, rest) + } else { + parts = fields + } + } else { + if maxsplit < 0 { + parts = strings.Split(s.v, sep) + } else { + parts = strings.SplitN(s.v, sep, maxsplit+1) + } + } + items := make([]Object, len(parts)) + for i, p := range parts { + items[i] = pyStr(p) + } + return pyList(items) + }), true + case "rsplit": + return makeBuiltin("rsplit", func(args []Object, kwargs map[string]Object) Object { + sep := "" + maxsplit := -1 + if len(args) > 0 && args[0] != pyNone { + sep = mustStr(args[0], "rsplit") + } + if len(args) > 1 { + if n, ok := args[1].(*PyInt); ok { + if v, ok2 := n.int64(); ok2 { + maxsplit = int(v) + } + } + } + var parts []string + if sep == "" { + fields := strings.Fields(s.v) + if maxsplit >= 0 && len(fields) > maxsplit+1 { + split := len(fields) - maxsplit + rest := strings.Join(fields[:split], " ") + parts = append([]string{rest}, fields[split:]...) + } else { + parts = fields + } + } else { + if maxsplit < 0 { + parts = strings.Split(s.v, sep) + } else { + // SplitN from right + parts = strRSplitN(s.v, sep, maxsplit+1) + } + } + items := make([]Object, len(parts)) + for i, p := range parts { + items[i] = pyStr(p) + } + return pyList(items) + }), true + case "join": + return makeBuiltin("join", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("join() takes exactly 1 argument") + } + items := iterToStrings(args[0], "join") + return pyStr(strings.Join(items, s.v)) + }), true + case "startswith": + return makeBuiltin("startswith", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("startswith() requires at least 1 argument") + } + prefix := mustStr(args[0], "startswith") + return pyBool(strings.HasPrefix(s.v, prefix)) + }), true + case "endswith": + return makeBuiltin("endswith", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("endswith() requires at least 1 argument") + } + suffix := mustStr(args[0], "endswith") + return pyBool(strings.HasSuffix(s.v, suffix)) + }), true + case "replace": + return makeBuiltin("replace", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 2 { + raiseTypeError("replace() requires at least 2 arguments") + } + old := mustStr(args[0], "replace") + new_ := mustStr(args[1], "replace") + n := -1 + if len(args) > 2 { + if v, ok := args[2].(*PyInt); ok { + if i, ok2 := v.int64(); ok2 { + n = int(i) + } + } + } + return pyStr(strings.Replace(s.v, old, new_, n)) + }), true + case "find": + return makeBuiltin("find", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("find() requires at least 1 argument") + } + sub := mustStr(args[0], "find") + idx := strings.Index(s.v, sub) + return pyInt(int64(idx)) + }), true + case "rfind": + return makeBuiltin("rfind", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("rfind() requires at least 1 argument") + } + sub := mustStr(args[0], "rfind") + idx := strings.LastIndex(s.v, sub) + return pyInt(int64(idx)) + }), true + case "index": + return makeBuiltin("index", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("index() requires at least 1 argument") + } + sub := mustStr(args[0], "index") + idx := strings.Index(s.v, sub) + if idx < 0 { + raiseValueError("substring not found") + } + return pyInt(int64(idx)) + }), true + case "rindex": + return makeBuiltin("rindex", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("rindex() requires at least 1 argument") + } + sub := mustStr(args[0], "rindex") + idx := strings.LastIndex(s.v, sub) + if idx < 0 { + raiseValueError("substring not found") + } + return pyInt(int64(idx)) + }), true + case "count": + return makeBuiltin("count", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("count() requires at least 1 argument") + } + sub := mustStr(args[0], "count") + return pyInt(int64(strings.Count(s.v, sub))) + }), true + case "encode": + return makeBuiltin("encode", func(args []Object, kwargs map[string]Object) Object { + // Default: UTF-8 + return pyBytes([]byte(s.v)) + }), true + case "isdigit": + return makeBuiltin("isdigit", func(args []Object, kwargs map[string]Object) Object { + if len(s.v) == 0 { + return pyFalse + } + for _, r := range s.v { + if r < '0' || r > '9' { + return pyFalse + } + } + return pyTrue + }), true + case "isalpha": + return makeBuiltin("isalpha", func(args []Object, kwargs map[string]Object) Object { + if len(s.v) == 0 { + return pyFalse + } + for _, r := range s.v { + if !((r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z')) { + return pyFalse + } + } + return pyTrue + }), true + case "isalnum": + return makeBuiltin("isalnum", func(args []Object, kwargs map[string]Object) Object { + if len(s.v) == 0 { + return pyFalse + } + for _, r := range s.v { + if !((r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9')) { + return pyFalse + } + } + return pyTrue + }), true + case "isspace": + return makeBuiltin("isspace", func(args []Object, kwargs map[string]Object) Object { + if len(s.v) == 0 { + return pyFalse + } + for _, r := range s.v { + if !strings.ContainsRune(" \t\n\r\x0b\x0c", r) { + return pyFalse + } + } + return pyTrue + }), true + case "isupper": + return makeBuiltin("isupper", func(args []Object, kwargs map[string]Object) Object { + if len(s.v) == 0 { + return pyFalse + } + hasUpper := false + for _, r := range s.v { + if r >= 'a' && r <= 'z' { + return pyFalse + } + if r >= 'A' && r <= 'Z' { + hasUpper = true + } + } + return pyBool(hasUpper) + }), true + case "islower": + return makeBuiltin("islower", func(args []Object, kwargs map[string]Object) Object { + if len(s.v) == 0 { + return pyFalse + } + hasLower := false + for _, r := range s.v { + if r >= 'A' && r <= 'Z' { + return pyFalse + } + if r >= 'a' && r <= 'z' { + hasLower = true + } + } + return pyBool(hasLower) + }), true + case "zfill": + return makeBuiltin("zfill", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("zfill() requires 1 argument") + } + w := int(toIntVal(args[0])) + return pyStr(strZfill(s.v, w)) + }), true + case "center": + return makeBuiltin("center", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("center() requires 1 argument") + } + w := int(toIntVal(args[0])) + fill := " " + if len(args) > 1 { + fill = mustStr(args[1], "center") + } + return pyStr(strCenter(s.v, w, fill)) + }), true + case "ljust": + return makeBuiltin("ljust", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("ljust() requires 1 argument") + } + w := int(toIntVal(args[0])) + fill := " " + if len(args) > 1 { + fill = mustStr(args[1], "ljust") + } + return pyStr(strLjust(s.v, w, fill)) + }), true + case "rjust": + return makeBuiltin("rjust", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("rjust() requires 1 argument") + } + w := int(toIntVal(args[0])) + fill := " " + if len(args) > 1 { + fill = mustStr(args[1], "rjust") + } + return pyStr(strRjust(s.v, w, fill)) + }), true + case "title": + return makeBuiltin("title", func(args []Object, kwargs map[string]Object) Object { + return pyStr(strings.Title(s.v)) //nolint:staticcheck + }), true + case "capitalize": + return makeBuiltin("capitalize", func(args []Object, kwargs map[string]Object) Object { + if len(s.v) == 0 { + return pyStr("") + } + return pyStr(strings.ToUpper(s.v[:1]) + strings.ToLower(s.v[1:])) + }), true + case "format": + return makeBuiltin("format", func(args []Object, kwargs map[string]Object) Object { + return pyStr(strFormat(s.v, args, kwargs)) + }), true + case "format_map": + return makeBuiltin("format_map", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("format_map() requires exactly 1 argument") + } + d, ok := args[0].(*PyDict) + if !ok { + raiseTypeError("format_map() argument must be a dict") + } + mapping := make(map[string]Object) + for i, k := range d.keys { + if ks, ok2 := k.(*PyStr); ok2 { + mapping[ks.v] = d.vals[i] + } + } + return pyStr(strFormat(s.v, nil, mapping)) + }), true + case "expandtabs": + return makeBuiltin("expandtabs", func(args []Object, kwargs map[string]Object) Object { + tabsize := 8 + if len(args) > 0 { + tabsize = int(toIntVal(args[0])) + } + return pyStr(strings.ReplaceAll(s.v, "\t", strings.Repeat(" ", tabsize))) + }), true + case "splitlines": + return makeBuiltin("splitlines", func(args []Object, kwargs map[string]Object) Object { + keepends := false + if len(args) > 0 { + keepends = pyTruth(args[0]) + } + lines := splitlines(s.v, keepends) + items := make([]Object, len(lines)) + for i, l := range lines { + items[i] = pyStr(l) + } + return pyList(items) + }), true + case "partition": + return makeBuiltin("partition", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("partition() requires 1 argument") + } + sep := mustStr(args[0], "partition") + idx := strings.Index(s.v, sep) + if idx < 0 { + return pyTuple([]Object{pyStr(s.v), pyStr(""), pyStr("")}) + } + return pyTuple([]Object{pyStr(s.v[:idx]), pyStr(sep), pyStr(s.v[idx+len(sep):])}) + }), true + case "rpartition": + return makeBuiltin("rpartition", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("rpartition() requires 1 argument") + } + sep := mustStr(args[0], "rpartition") + idx := strings.LastIndex(s.v, sep) + if idx < 0 { + return pyTuple([]Object{pyStr(""), pyStr(""), pyStr(s.v)}) + } + return pyTuple([]Object{pyStr(s.v[:idx]), pyStr(sep), pyStr(s.v[idx+len(sep):])}) + }), true + case "translate": + return makeBuiltin("translate", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("translate() requires 1 argument") + } + // table is a dict mapping ordinals to ordinals/strings/None + table, ok := args[0].(*PyDict) + if !ok { + raiseTypeError("translate() argument must be a dict") + } + var b strings.Builder + for _, r := range s.v { + key := pyInt(int64(r)) + k, _ := hashKey(key) + if idx, found := table.index[k]; found { + v := table.vals[idx] + if v == pyNone { + // delete + } else if vs, ok2 := v.(*PyStr); ok2 { + b.WriteString(vs.v) + } else if vi, ok2 := v.(*PyInt); ok2 { + if n, ok3 := vi.int64(); ok3 { + b.WriteRune(rune(n)) + } + } + } else { + b.WriteRune(r) + } + } + return pyStr(b.String()) + }), true + } + return nil, false +} + +// iterToStrings collects strings from an iterable for join(). +func iterToStrings(obj Object, fnName string) []string { + items := collectIterable(obj) + result := make([]string, len(items)) + for i, item := range items { + s, ok := item.(*PyStr) + if !ok { + raiseTypeError("sequence item %d: expected str instance, %s found", i, item.pyType().Name) + } + result[i] = s.v + } + return result +} + +// strRSplitN splits s by sep from the right, at most n parts from the right. +func strRSplitN(s, sep string, n int) []string { + if n == 1 { + return []string{s} + } + parts := []string{} + for len(parts) < n-1 { + idx := strings.LastIndex(s, sep) + if idx < 0 { + break + } + parts = append([]string{s[idx+len(sep):]}, parts...) + s = s[:idx] + } + return append([]string{s}, parts...) +} + +// splitlines splits a string by line endings. +func splitlines(s string, keepends bool) []string { + var lines []string + for len(s) > 0 { + idx := strings.IndexAny(s, "\n\r\x0b\x0c\x1c\x1d\x1e\x85") + if idx < 0 { + lines = append(lines, s) + break + } + end := idx + 1 + if s[idx] == '\r' && idx+1 < len(s) && s[idx+1] == '\n' { + end = idx + 2 + } + if keepends { + lines = append(lines, s[:end]) + } else { + lines = append(lines, s[:idx]) + } + s = s[end:] + } + return lines +} + +func strZfill(s string, w int) string { + runes := []rune(s) + pad := w - len(runes) + if pad <= 0 { + return s + } + sign := "" + if len(runes) > 0 && (runes[0] == '+' || runes[0] == '-') { + sign = string(runes[0]) + runes = runes[1:] + } + return sign + strings.Repeat("0", pad) + string(runes) +} + +func strCenter(s string, w int, fill string) string { + runes := []rune(s) + pad := w - len(runes) + if pad <= 0 { + return s + } + fillRune := []rune(fill) + if len(fillRune) == 0 { + return s + } + leftPad := pad / 2 + rightPad := pad - leftPad + return strings.Repeat(fill, leftPad/len(fillRune)+1)[:leftPad] + s + strings.Repeat(fill, rightPad/len(fillRune)+1)[:rightPad] +} + +func strLjust(s string, w int, fill string) string { + runes := []rune(s) + pad := w - len(runes) + if pad <= 0 { + return s + } + return s + strings.Repeat(fill, pad) +} + +func strRjust(s string, w int, fill string) string { + runes := []rune(s) + pad := w - len(runes) + if pad <= 0 { + return s + } + return strings.Repeat(fill, pad) + s +} + +// strFormat implements str.format(). +func strFormat(tmpl string, args []Object, kwargs map[string]Object) string { + var b strings.Builder + autoIdx := 0 + i := 0 + for i < len(tmpl) { + if tmpl[i] == '{' { + if i+1 < len(tmpl) && tmpl[i+1] == '{' { + b.WriteByte('{') + i += 2 + continue + } + end := strings.Index(tmpl[i:], "}") + if end < 0 { + b.WriteByte('{') + i++ + continue + } + field := tmpl[i+1 : i+end] + i += end + 1 + + // Parse field: [field_name][!conversion][:format_spec] + conv := "" + spec := "" + if ci := strings.Index(field, "!"); ci >= 0 { + conv = field[ci+1:] + field = field[:ci] + if ci2 := strings.Index(conv, ":"); ci2 >= 0 { + spec = conv[ci2+1:] + conv = conv[:ci2] + } + } else if ci := strings.Index(field, ":"); ci >= 0 { + spec = field[ci+1:] + field = field[:ci] + } + + var val Object + if field == "" { + // Auto-numbered + if autoIdx < len(args) { + val = args[autoIdx] + } else { + val = pyNone + } + autoIdx++ + } else if n, err := strconv.Atoi(field); err == nil { + if n < len(args) { + val = args[n] + } else { + val = pyNone + } + } else { + // Named + if kwargs != nil { + val = kwargs[field] + } + if val == nil { + val = pyNone + } + } + + // Apply conversion + var s string + switch conv { + case "r": + s = val.pyRepr() + case "s": + s = val.pyStr() + case "a": + s = val.pyRepr() // simplified + default: + s = val.pyStr() + } + + // Apply format spec + if spec != "" { + s = applyFormatSpec(s, val, spec) + } + b.WriteString(s) + } else if tmpl[i] == '}' && i+1 < len(tmpl) && tmpl[i+1] == '}' { + b.WriteByte('}') + i += 2 + } else { + b.WriteByte(tmpl[i]) + i++ + } + } + return b.String() +} + +func applyFormatSpec(s string, val Object, spec string) string { + if spec == "" { + return s + } + // Very simple format spec: just handle d, f, s, r, x, o, b, e, g + switch spec[len(spec)-1] { + case 'd': + if n, ok := val.(*PyInt); ok { + v, _ := n.int64() + s = strconv.FormatInt(v, 10) + } + case 'f': + var f float64 + switch v := val.(type) { + case *PyFloat: + f = v.v + case *PyInt: + if n, ok := v.int64(); ok { + f = float64(n) + } + } + prec := 6 + if len(spec) > 1 { + if dotIdx := strings.Index(spec, "."); dotIdx >= 0 { + p, err := strconv.Atoi(spec[dotIdx+1 : len(spec)-1]) + if err == nil { + prec = p + } + } + } + s = strconv.FormatFloat(f, 'f', prec, 64) + case 'x': + if n, ok := val.(*PyInt); ok { + v, _ := n.int64() + s = strconv.FormatInt(v, 16) + } + case 'o': + if n, ok := val.(*PyInt); ok { + v, _ := n.int64() + s = strconv.FormatInt(v, 8) + } + case 'b': + if n, ok := val.(*PyInt); ok { + v, _ := n.int64() + s = strconv.FormatInt(v, 2) + } + } + return s +} + +// strPercent implements % formatting. +func strPercent(tmpl string, args Object) string { + // Collect args into a slice + var argList []Object + switch v := args.(type) { + case *PyTuple: + argList = v.items + default: + argList = []Object{args} + } + + var b strings.Builder + argIdx := 0 + i := 0 + for i < len(tmpl) { + if tmpl[i] != '%' { + b.WriteByte(tmpl[i]) + i++ + continue + } + i++ + if i >= len(tmpl) { + break + } + if tmpl[i] == '%' { + b.WriteByte('%') + i++ + continue + } + // Parse optional flags + for i < len(tmpl) && (tmpl[i] == '-' || tmpl[i] == '+' || tmpl[i] == ' ' || tmpl[i] == '0' || tmpl[i] == '#') { + i++ + } + // Parse optional width (integer) + for i < len(tmpl) && tmpl[i] >= '0' && tmpl[i] <= '9' { + i++ + } + // Parse optional precision: .digits + prec := -1 + if i < len(tmpl) && tmpl[i] == '.' { + i++ + prec = 0 + for i < len(tmpl) && tmpl[i] >= '0' && tmpl[i] <= '9' { + prec = prec*10 + int(tmpl[i]-'0') + i++ + } + } + if i >= len(tmpl) { + break + } + // Consume arg only once per format spec. + var arg Object + if argIdx < len(argList) { + arg = argList[argIdx] + argIdx++ + } else { + arg = pyNone + } + switch tmpl[i] { + case 's': + b.WriteString(arg.pyStr()) + case 'r': + b.WriteString(arg.pyRepr()) + case 'd': + switch v := arg.(type) { + case *PyInt: + n, _ := v.int64() + b.WriteString(strconv.FormatInt(n, 10)) + case *PyFloat: + b.WriteString(strconv.FormatInt(int64(v.v), 10)) + case *PyBool: + if v.v { + b.WriteString("1") + } else { + b.WriteString("0") + } + default: + b.WriteString("0") + } + case 'f': + var f float64 + switch v := arg.(type) { + case *PyFloat: + f = v.v + case *PyInt: + n, _ := v.int64() + f = float64(n) + } + digits := 6 + if prec >= 0 { + digits = prec + } + b.WriteString(strconv.FormatFloat(f, 'f', digits, 64)) + case 'e', 'E': + var f float64 + switch v := arg.(type) { + case *PyFloat: + f = v.v + case *PyInt: + n, _ := v.int64() + f = float64(n) + } + digits := 6 + if prec >= 0 { + digits = prec + } + s := strconv.FormatFloat(f, 'e', digits, 64) + if tmpl[i] == 'E' { + s = strings.ToUpper(s) + } + b.WriteString(s) + case 'g', 'G': + var f float64 + switch v := arg.(type) { + case *PyFloat: + f = v.v + case *PyInt: + n, _ := v.int64() + f = float64(n) + } + digits := -1 + if prec >= 0 { + digits = prec + } + s := strconv.FormatFloat(f, 'g', digits, 64) + if tmpl[i] == 'G' { + s = strings.ToUpper(s) + } + b.WriteString(s) + case 'x': + if v, ok := arg.(*PyInt); ok { + n, _ := v.int64() + b.WriteString(strconv.FormatInt(n, 16)) + } + case 'X': + if v, ok := arg.(*PyInt); ok { + n, _ := v.int64() + b.WriteString(strings.ToUpper(strconv.FormatInt(n, 16))) + } + case 'o': + if v, ok := arg.(*PyInt); ok { + n, _ := v.int64() + b.WriteString(strconv.FormatInt(n, 8)) + } + case 'b': + if v, ok := arg.(*PyInt); ok { + n, _ := v.int64() + b.WriteString(strconv.FormatInt(n, 2)) + } + case 'c': + switch v := arg.(type) { + case *PyInt: + n, _ := v.int64() + b.WriteRune(rune(n)) + case *PyStr: + if len(v.v) > 0 { + r, _ := utf8.DecodeRuneInString(v.v) + b.WriteRune(r) + } + } + default: + b.WriteByte('%') + b.WriteByte(tmpl[i]) + } + i++ + } + return b.String() +} + +// ---- PyBytes ---- + +// PyBytes is the Python bytes type. +type PyBytes struct{ v []byte } + +func pyBytes(b []byte) *PyBytes { return &PyBytes{v: b} } +func (b *PyBytes) pyType() *PyType { return typeBytes } +func (b *PyBytes) pyRepr() string { + var sb strings.Builder + sb.WriteString("b'") + for _, c := range b.v { + switch c { + case '\'': + sb.WriteString("\\'") + case '\\': + sb.WriteString("\\\\") + case '\n': + sb.WriteString("\\n") + case '\r': + sb.WriteString("\\r") + case '\t': + sb.WriteString("\\t") + default: + if c < 32 || c >= 127 { + fmt.Fprintf(&sb, "\\x%02x", c) + } else { + sb.WriteByte(c) + } + } + } + sb.WriteByte('\'') + return sb.String() +} +func (b *PyBytes) pyStr() string { return b.pyRepr() } + +func bytesGetAttr(b *PyBytes, name string) (Object, bool) { + switch name { + case "hex": + return makeBuiltin("hex", func(args []Object, kwargs map[string]Object) Object { + result := make([]byte, len(b.v)*2) + const hexChars = "0123456789abcdef" + for i, c := range b.v { + result[i*2] = hexChars[c>>4] + result[i*2+1] = hexChars[c&0xf] + } + return pyStr(string(result)) + }), true + case "decode": + return makeBuiltin("decode", func(args []Object, kwargs map[string]Object) Object { + // Default: UTF-8 + s := string(b.v) + if !utf8.ValidString(s) { + panic(exceptionSignal{exc: newExceptionf(ExcUnicodeDecodeError, "invalid utf-8 sequence")}) + } + return pyStr(s) + }), true + } + return nil, false +} + +// ---- PyList ---- + +// PyList is the Python list type. +type PyList struct{ items []Object } + +func pyList(items []Object) *PyList { + if items == nil { + items = []Object{} + } + return &PyList{items: items} +} +func (l *PyList) pyType() *PyType { return typeList } +func (l *PyList) pyRepr() string { + parts := make([]string, len(l.items)) + for i, item := range l.items { + parts[i] = item.pyRepr() + } + return "[" + strings.Join(parts, ", ") + "]" +} +func (l *PyList) pyStr() string { return l.pyRepr() } + +func listGetAttr(l *PyList, name string) (Object, bool) { + switch name { + case "append": + return makeBuiltin("append", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("append() takes exactly 1 argument") + } + l.items = append(l.items, args[0]) + return pyNone + }), true + case "extend": + return makeBuiltin("extend", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("extend() takes exactly 1 argument") + } + items := collectIterable(args[0]) + l.items = append(l.items, items...) + return pyNone + }), true + case "insert": + return makeBuiltin("insert", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 2 { + raiseTypeError("insert() takes exactly 2 arguments") + } + idx := int(toIntVal(args[0])) + if idx < 0 { + idx = len(l.items) + idx + } + if idx < 0 { + idx = 0 + } + if idx > len(l.items) { + idx = len(l.items) + } + l.items = append(l.items, nil) + copy(l.items[idx+1:], l.items[idx:]) + l.items[idx] = args[1] + return pyNone + }), true + case "remove": + return makeBuiltin("remove", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("remove() takes exactly 1 argument") + } + for i, item := range l.items { + if pyEq(item, args[0]) { + l.items = append(l.items[:i], l.items[i+1:]...) + return pyNone + } + } + raiseValueError("list.remove(x): x not in list") + return nil + }), true + case "pop": + return makeBuiltin("pop", func(args []Object, kwargs map[string]Object) Object { + if len(l.items) == 0 { + raiseIndexError("pop from empty list") + } + idx := len(l.items) - 1 + if len(args) > 0 { + idx = int(toIntVal(args[0])) + } + if idx < 0 { + idx = len(l.items) + idx + } + if idx < 0 || idx >= len(l.items) { + raiseIndexError("pop index out of range") + } + val := l.items[idx] + l.items = append(l.items[:idx], l.items[idx+1:]...) + return val + }), true + case "index": + return makeBuiltin("index", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("index() requires at least 1 argument") + } + for i, item := range l.items { + if pyEq(item, args[0]) { + return pyInt(int64(i)) + } + } + raiseValueError("%s is not in list", args[0].pyRepr()) + return nil + }), true + case "count": + return makeBuiltin("count", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("count() takes exactly 1 argument") + } + n := 0 + for _, item := range l.items { + if pyEq(item, args[0]) { + n++ + } + } + return pyInt(int64(n)) + }), true + case "sort": + return makeBuiltin("sort", func(args []Object, kwargs map[string]Object) Object { + reverse := false + var keyFn Object + if v, ok := kwargs["reverse"]; ok { + reverse = pyTruth(v) + } + if v, ok := kwargs["key"]; ok && v != pyNone { + keyFn = v + } + sortList(l.items, keyFn, reverse) + return pyNone + }), true + case "reverse": + return makeBuiltin("reverse", func(args []Object, kwargs map[string]Object) Object { + for i, j := 0, len(l.items)-1; i < j; i, j = i+1, j-1 { + l.items[i], l.items[j] = l.items[j], l.items[i] + } + return pyNone + }), true + case "copy": + return makeBuiltin("copy", func(args []Object, kwargs map[string]Object) Object { + items := make([]Object, len(l.items)) + copy(items, l.items) + return pyList(items) + }), true + case "clear": + return makeBuiltin("clear", func(args []Object, kwargs map[string]Object) Object { + l.items = []Object{} + return pyNone + }), true + } + return nil, false +} + +// ---- PyTuple ---- + +// PyTuple is the Python tuple type. +type PyTuple struct{ items []Object } + +func pyTuple(items []Object) *PyTuple { + if items == nil { + items = []Object{} + } + return &PyTuple{items: items} +} +func (t *PyTuple) pyType() *PyType { return typeTuple } +func (t *PyTuple) pyRepr() string { + if len(t.items) == 0 { + return "()" + } + parts := make([]string, len(t.items)) + for i, item := range t.items { + parts[i] = item.pyRepr() + } + if len(t.items) == 1 { + return "(" + parts[0] + ",)" + } + return "(" + strings.Join(parts, ", ") + ")" +} +func (t *PyTuple) pyStr() string { return t.pyRepr() } + +// ---- PyDict ---- + +// PyDict is the Python dict type, preserving insertion order. +type PyDict struct { + keys []Object + vals []Object + index map[any]int +} + +func pyDict() *PyDict { + return &PyDict{index: make(map[any]int)} +} + +func pyDictFromPairs(pairs [][2]Object) *PyDict { + d := pyDict() + for _, p := range pairs { + d.set(p[0], p[1]) + } + return d +} + +func (d *PyDict) get(key Object) (Object, bool) { + k, err := hashKey(key) + if err != nil { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "unhashable type: '%s'", key.pyType().Name)}) + } + if idx, ok := d.index[k]; ok { + return d.vals[idx], true + } + return nil, false +} + +func (d *PyDict) set(key Object, val Object) { + k, err := hashKey(key) + if err != nil { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, "unhashable type: '%s'", key.pyType().Name)}) + } + if idx, ok := d.index[k]; ok { + d.vals[idx] = val + return + } + d.index[k] = len(d.keys) + d.keys = append(d.keys, key) + d.vals = append(d.vals, val) +} + +func (d *PyDict) del(key Object) bool { + k, err := hashKey(key) + if err != nil { + return false + } + idx, ok := d.index[k] + if !ok { + return false + } + // Remove from slice + d.keys = append(d.keys[:idx], d.keys[idx+1:]...) + d.vals = append(d.vals[:idx], d.vals[idx+1:]...) + // Rebuild index + delete(d.index, k) + for i := idx; i < len(d.keys); i++ { + k2, _ := hashKey(d.keys[i]) + d.index[k2] = i + } + return true +} + +func (d *PyDict) pyType() *PyType { return typeDict } +func (d *PyDict) pyRepr() string { + if len(d.keys) == 0 { + return "{}" + } + parts := make([]string, len(d.keys)) + for i := range d.keys { + parts[i] = d.keys[i].pyRepr() + ": " + d.vals[i].pyRepr() + } + return "{" + strings.Join(parts, ", ") + "}" +} +func (d *PyDict) pyStr() string { return d.pyRepr() } + +func dictGetAttr(d *PyDict, name string) (Object, bool) { + switch name { + case "get": + return makeBuiltin("get", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("get() requires at least 1 argument") + } + k, err := hashKey(args[0]) + if err != nil { + raiseTypeError("unhashable type: '%s'", args[0].pyType().Name) + } + if idx, ok := d.index[k]; ok { + return d.vals[idx] + } + if len(args) > 1 { + return args[1] + } + return pyNone + }), true + case "keys": + return makeBuiltin("keys", func(args []Object, kwargs map[string]Object) Object { + items := make([]Object, len(d.keys)) + copy(items, d.keys) + return pyList(items) + }), true + case "values": + return makeBuiltin("values", func(args []Object, kwargs map[string]Object) Object { + items := make([]Object, len(d.vals)) + copy(items, d.vals) + return pyList(items) + }), true + case "items": + return makeBuiltin("items", func(args []Object, kwargs map[string]Object) Object { + items := make([]Object, len(d.keys)) + for i := range d.keys { + items[i] = pyTuple([]Object{d.keys[i], d.vals[i]}) + } + return pyList(items) + }), true + case "update": + return makeBuiltin("update", func(args []Object, kwargs map[string]Object) Object { + if len(args) > 0 { + if other, ok := args[0].(*PyDict); ok { + for i, k := range other.keys { + d.set(k, other.vals[i]) + } + } + } + for k, v := range kwargs { + d.set(pyStr(k), v) + } + return pyNone + }), true + case "pop": + return makeBuiltin("pop", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("pop() requires at least 1 argument") + } + val, ok := d.get(args[0]) + if !ok { + if len(args) > 1 { + return args[1] + } + raiseKeyError(args[0]) + } + d.del(args[0]) + return val + }), true + case "setdefault": + return makeBuiltin("setdefault", func(args []Object, kwargs map[string]Object) Object { + if len(args) < 1 { + raiseTypeError("setdefault() requires at least 1 argument") + } + val, ok := d.get(args[0]) + if ok { + return val + } + def := Object(pyNone) + if len(args) > 1 { + def = args[1] + } + d.set(args[0], def) + return def + }), true + case "copy": + return makeBuiltin("copy", func(args []Object, kwargs map[string]Object) Object { + newD := pyDict() + for i, k := range d.keys { + newD.set(k, d.vals[i]) + } + return newD + }), true + case "clear": + return makeBuiltin("clear", func(args []Object, kwargs map[string]Object) Object { + d.keys = nil + d.vals = nil + d.index = make(map[any]int) + return pyNone + }), true + } + return nil, false +} + +// ---- PySet ---- + +// PySet is the Python set type. +type PySet struct { + items map[any]Object +} + +func pySet(items []Object) (*PySet, error) { + s := &PySet{items: make(map[any]Object)} + for _, item := range items { + k, err := hashKey(item) + if err != nil { + return nil, err + } + s.items[k] = item + } + return s, nil +} + +func (s *PySet) pyType() *PyType { return typeSet } +func (s *PySet) pyRepr() string { + if len(s.items) == 0 { + return "set()" + } + parts := make([]string, 0, len(s.items)) + for _, v := range s.items { + parts = append(parts, v.pyRepr()) + } + return "{" + strings.Join(parts, ", ") + "}" +} +func (s *PySet) pyStr() string { return s.pyRepr() } + +func setGetAttr(s *PySet, name string) (Object, bool) { + switch name { + case "add": + return makeBuiltin("add", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("add() takes exactly 1 argument") + } + k, err := hashKey(args[0]) + if err != nil { + raiseTypeError("unhashable type: '%s'", args[0].pyType().Name) + } + s.items[k] = args[0] + return pyNone + }), true + case "discard": + return makeBuiltin("discard", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("discard() takes exactly 1 argument") + } + k, err := hashKey(args[0]) + if err == nil { + delete(s.items, k) + } + return pyNone + }), true + case "remove": + return makeBuiltin("remove", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("remove() takes exactly 1 argument") + } + k, err := hashKey(args[0]) + if err != nil { + raiseTypeError("unhashable type: '%s'", args[0].pyType().Name) + } + if _, ok := s.items[k]; !ok { + raiseKeyError(args[0]) + } + delete(s.items, k) + return pyNone + }), true + case "pop": + return makeBuiltin("pop", func(args []Object, kwargs map[string]Object) Object { + for k, v := range s.items { + delete(s.items, k) + return v + } + raiseKeyError(pyStr("pop from an empty set")) + return nil + }), true + case "union": + return makeBuiltin("union", func(args []Object, kwargs map[string]Object) Object { + result := &PySet{items: make(map[any]Object)} + for k, v := range s.items { + result.items[k] = v + } + for _, arg := range args { + items := collectIterable(arg) + for _, item := range items { + k, err := hashKey(item) + if err != nil { + raiseTypeError("unhashable type: '%s'", item.pyType().Name) + } + result.items[k] = item + } + } + return result + }), true + case "intersection": + return makeBuiltin("intersection", func(args []Object, kwargs map[string]Object) Object { + result := &PySet{items: make(map[any]Object)} + if len(args) == 0 { + return result + } + other := args[0] + otherItems := collectIterable(other) + otherSet := make(map[any]bool) + for _, item := range otherItems { + k, err := hashKey(item) + if err == nil { + otherSet[k] = true + } + } + for k, v := range s.items { + if otherSet[k] { + result.items[k] = v + } + } + return result + }), true + case "difference": + return makeBuiltin("difference", func(args []Object, kwargs map[string]Object) Object { + result := &PySet{items: make(map[any]Object)} + for k, v := range s.items { + result.items[k] = v + } + for _, arg := range args { + items := collectIterable(arg) + for _, item := range items { + k, err := hashKey(item) + if err == nil { + delete(result.items, k) + } + } + } + return result + }), true + case "issubset": + return makeBuiltin("issubset", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("issubset() takes exactly 1 argument") + } + otherItems := collectIterable(args[0]) + otherSet := make(map[any]bool) + for _, item := range otherItems { + k, err := hashKey(item) + if err == nil { + otherSet[k] = true + } + } + for k := range s.items { + if !otherSet[k] { + return pyFalse + } + } + return pyTrue + }), true + case "issuperset": + return makeBuiltin("issuperset", func(args []Object, kwargs map[string]Object) Object { + if len(args) != 1 { + raiseTypeError("issuperset() takes exactly 1 argument") + } + otherItems := collectIterable(args[0]) + for _, item := range otherItems { + k, err := hashKey(item) + if err != nil { + raiseTypeError("unhashable type: '%s'", item.pyType().Name) + } + if _, ok := s.items[k]; !ok { + return pyFalse + } + } + return pyTrue + }), true + } + return nil, false +} + +// PyFrozenSet is the Python frozenset type. +type PyFrozenSet struct { + items map[any]Object +} + +func (s *PyFrozenSet) pyType() *PyType { return typeFrozenSet } +func (s *PyFrozenSet) pyRepr() string { + if len(s.items) == 0 { + return "frozenset()" + } + parts := make([]string, 0, len(s.items)) + for _, v := range s.items { + parts = append(parts, v.pyRepr()) + } + return "frozenset({" + strings.Join(parts, ", ") + "})" +} +func (s *PyFrozenSet) pyStr() string { return s.pyRepr() } + +// ---- PyFunction ---- + +// PyFunction represents a user-defined Python function. +type PyFunction struct { + Name string + Args *Arguments + Body []Stmt + Closure *Scope + Globals map[string]Object + Defaults []Object + KwDefaults map[string]Object + IsGen bool +} + +func (f *PyFunction) pyType() *PyType { return typeFunction } +func (f *PyFunction) pyRepr() string { return "" } +func (f *PyFunction) pyStr() string { return f.pyRepr() } + +// ---- PyBuiltin ---- + +// PyBuiltin is a built-in function or method. +type PyBuiltin struct { + Name string + Fn func(args []Object, kwargs map[string]Object) Object +} + +func (b *PyBuiltin) pyType() *PyType { return typeBuiltin } +func (b *PyBuiltin) pyRepr() string { return "" } +func (b *PyBuiltin) pyStr() string { return b.pyRepr() } + +func makeBuiltin(name string, fn func([]Object, map[string]Object) Object) *PyBuiltin { + return &PyBuiltin{Name: name, Fn: fn} +} + +// ---- PyClass and PyInstance ---- + +// PyClass represents a Python class (user-defined or built-in exception class). +type PyClass struct { + Name string + Bases []*PyClass + MRO []*PyClass + Dict map[string]Object +} + +func (c *PyClass) pyType() *PyType { return typeClass } +func (c *PyClass) pyRepr() string { return "" } +func (c *PyClass) pyStr() string { return c.pyRepr() } + +// computeMRO computes the C3 linearization of a class hierarchy. +func computeMRO(cls *PyClass) []*PyClass { + if len(cls.Bases) == 0 { + return []*PyClass{cls} + } + // Simple: cls + flatten bases + seen := map[*PyClass]bool{cls: true} + result := []*PyClass{cls} + var walk func(c *PyClass) + walk = func(c *PyClass) { + for _, base := range c.Bases { + if !seen[base] { + seen[base] = true + result = append(result, base) + walk(base) + } + } + } + walk(cls) + return result +} + +// PyInstance represents a Python object instance. +type PyInstance struct { + Class *PyClass + Dict map[string]Object +} + +func (i *PyInstance) pyType() *PyType { return typeClass } +func (i *PyInstance) pyRepr() string { + if reprFn, ok := i.lookupMethod("__repr__"); ok { + result := callObject(reprFn, []Object{i}, nil) + if s, ok := result.(*PyStr); ok { + return s.v + } + } + return "<" + i.Class.Name + " object>" +} +func (i *PyInstance) pyStr() string { + if strFn, ok := i.lookupMethod("__str__"); ok { + result := callObject(strFn, []Object{i}, nil) + if s, ok := result.(*PyStr); ok { + return s.v + } + } + return i.pyRepr() +} + +func (i *PyInstance) lookupMethod(name string) (Object, bool) { + if v, ok := i.Dict[name]; ok { + return v, true + } + for _, cls := range i.Class.MRO { + if v, ok := cls.Dict[name]; ok { + return v, true + } + } + return nil, false +} + +// goroutineCallFns maps goroutine ID → the active evaluator's callObject for that goroutine. +// Each Python execution registers its callObject before running and deregisters on return, +// so concurrent executions never share a function pointer. +var goroutineCallFns sync.Map // map[int64]func(Object, []Object, map[string]Object) Object + +// goroutineID returns the current goroutine's numeric ID by inspecting the stack header. +// Format: "goroutine N [..." +// +// Parsing runtime.Stack output is fragile — the format is undocumented. Returns (0, false) +// if the ID cannot be parsed so callers can degrade gracefully rather than crashing. +func goroutineID() (int64, bool) { + var buf [64]byte + runtime.Stack(buf[:], false) + var id int64 + for i := 10; i < len(buf); i++ { // skip "goroutine " + c := buf[i] + if c < '0' || c > '9' { + break + } + id = id*10 + int64(c-'0') + } + if id == 0 { + return 0, false + } + return id, true +} + +// callObject dispatches a call through the evaluator registered for the current goroutine. +func callObject(fn Object, args []Object, kwargs map[string]Object) Object { + gid, ok := goroutineID() + if !ok { + panic(exceptionSignal{exc: newExceptionf(ExcRuntimeError, "could not determine goroutine ID (runtime.Stack format changed)")}) + } + v, ok := goroutineCallFns.Load(gid) + if !ok { + panic("callObject invoked outside Python evaluation context") + } + return v.(func(Object, []Object, map[string]Object) Object)(fn, args, kwargs) +} + +// ---- PyModule ---- + +// PyModule represents a Python module. +type PyModule struct { + Name string + Dict map[string]Object +} + +func (m *PyModule) pyType() *PyType { return typeModule } +func (m *PyModule) pyRepr() string { return "" } +func (m *PyModule) pyStr() string { return m.pyRepr() } + +// ---- PyRange ---- + +// PyRange represents a Python range object. +type PyRange struct { + start, stop, step int64 +} + +func (r *PyRange) pyType() *PyType { return typeRange } +func (r *PyRange) pyRepr() string { + if r.step == 1 { + return fmt.Sprintf("range(%d, %d)", r.start, r.stop) + } + return fmt.Sprintf("range(%d, %d, %d)", r.start, r.stop, r.step) +} +func (r *PyRange) pyStr() string { return r.pyRepr() } + +func (r *PyRange) length() int64 { + if r.step > 0 { + if r.stop <= r.start { + return 0 + } + return (r.stop - r.start + r.step - 1) / r.step + } + if r.step < 0 { + if r.start <= r.stop { + return 0 + } + return (r.start - r.stop - r.step - 1) / (-r.step) + } + return 0 +} + +// rangeIter is the iterator for PyRange. +type rangeIter struct { + r *PyRange + cur int64 +} + +func (ri *rangeIter) next() (Object, bool) { + if ri.r.step > 0 && ri.cur >= ri.r.stop { + return nil, false + } + if ri.r.step < 0 && ri.cur <= ri.r.stop { + return nil, false + } + val := ri.cur + ri.cur += ri.r.step + return pyInt(val), true +} + +func (ri *rangeIter) pyType() *PyType { return typeRange } +func (ri *rangeIter) pyRepr() string { return "" } +func (ri *rangeIter) pyStr() string { return ri.pyRepr() } + +// ---- PyGenerator ---- + +// PyGenerator implements Python generators via goroutines. +type PyGenerator struct { + name string + sendCh chan Object // caller → generator + yieldCh chan Object // generator → caller + done bool + awaitingSend bool // true after a value has been received from yieldCh; the generator is blocked waiting for sendCh + excCh chan *PyException // generator sends exception at close + ctx context.Context // execution context; used by drainGenerator to respect cancellation +} + +func (g *PyGenerator) pyType() *PyType { return typeGenerator } +func (g *PyGenerator) pyRepr() string { return "" } +func (g *PyGenerator) pyStr() string { return g.pyRepr() } + +// ---- PyException ---- + +// TraceFrame is a single frame in a traceback. +type TraceFrame struct { + File string + Line int + Name string +} + +// PyException represents a Python exception instance. +type PyException struct { + ExcClass *PyClass + Args []Object + Cause *PyException + Context *PyException + Traceback []TraceFrame + Dict map[string]Object +} + +func (e *PyException) pyType() *PyType { return typeClass } +func (e *PyException) pyRepr() string { + if len(e.Args) == 0 { + return e.ExcClass.Name + "()" + } + if len(e.Args) == 1 { + return e.ExcClass.Name + "(" + e.Args[0].pyRepr() + ")" + } + parts := make([]string, len(e.Args)) + for i, a := range e.Args { + parts[i] = a.pyRepr() + } + return e.ExcClass.Name + "(" + strings.Join(parts, ", ") + ")" +} +func (e *PyException) pyStr() string { + if len(e.Args) == 0 { + return "" + } + if len(e.Args) == 1 { + return e.Args[0].pyStr() + } + parts := make([]string, len(e.Args)) + for i, a := range e.Args { + parts[i] = a.pyStr() + } + return "(" + strings.Join(parts, ", ") + ")" +} + +// Exception class singletons. +var ( + ExcBaseException = &PyClass{Name: "BaseException"} + ExcException = &PyClass{Name: "Exception", Bases: []*PyClass{ExcBaseException}} + ExcArithmeticError = &PyClass{Name: "ArithmeticError", Bases: []*PyClass{ExcException}} + ExcLookupError = &PyClass{Name: "LookupError", Bases: []*PyClass{ExcException}} + ExcValueError = &PyClass{Name: "ValueError", Bases: []*PyClass{ExcException}} + ExcTypeError = &PyClass{Name: "TypeError", Bases: []*PyClass{ExcException}} + ExcAttributeError = &PyClass{Name: "AttributeError", Bases: []*PyClass{ExcException}} + ExcNameError = &PyClass{Name: "NameError", Bases: []*PyClass{ExcException}} + ExcImportError = &PyClass{Name: "ImportError", Bases: []*PyClass{ExcException}} + ExcIndexError = &PyClass{Name: "IndexError", Bases: []*PyClass{ExcLookupError}} + ExcKeyError = &PyClass{Name: "KeyError", Bases: []*PyClass{ExcLookupError}} + ExcStopIteration = &PyClass{Name: "StopIteration", Bases: []*PyClass{ExcException}} + ExcGeneratorExit = &PyClass{Name: "GeneratorExit", Bases: []*PyClass{ExcBaseException}} + ExcRuntimeError = &PyClass{Name: "RuntimeError", Bases: []*PyClass{ExcException}} + ExcNotImplementedError = &PyClass{Name: "NotImplementedError", Bases: []*PyClass{ExcRuntimeError}} + ExcOSError = &PyClass{Name: "OSError", Bases: []*PyClass{ExcException}} + ExcFileNotFoundError = &PyClass{Name: "FileNotFoundError", Bases: []*PyClass{ExcOSError}} + ExcPermissionError = &PyClass{Name: "PermissionError", Bases: []*PyClass{ExcOSError}} + ExcZeroDivisionError = &PyClass{Name: "ZeroDivisionError", Bases: []*PyClass{ExcArithmeticError}} + ExcOverflowError = &PyClass{Name: "OverflowError", Bases: []*PyClass{ExcArithmeticError}} + ExcMemoryError = &PyClass{Name: "MemoryError", Bases: []*PyClass{ExcException}} + ExcKeyboardInterrupt = &PyClass{Name: "KeyboardInterrupt", Bases: []*PyClass{ExcBaseException}} + ExcSystemExit = &PyClass{Name: "SystemExit", Bases: []*PyClass{ExcBaseException}} + ExcAssertionError = &PyClass{Name: "AssertionError", Bases: []*PyClass{ExcException}} + ExcUnboundLocalError = &PyClass{Name: "UnboundLocalError", Bases: []*PyClass{ExcNameError}} + ExcRecursionError = &PyClass{Name: "RecursionError", Bases: []*PyClass{ExcRuntimeError}} + ExcUnicodeError = &PyClass{Name: "UnicodeError", Bases: []*PyClass{ExcValueError}} + ExcUnicodeDecodeError = &PyClass{Name: "UnicodeDecodeError", Bases: []*PyClass{ExcUnicodeError}} + ExcUnicodeEncodeError = &PyClass{Name: "UnicodeEncodeError", Bases: []*PyClass{ExcUnicodeError}} + ExcIOError = ExcOSError // alias +) + +func init() { + allExcClasses := []*PyClass{ + ExcBaseException, ExcException, ExcArithmeticError, ExcLookupError, + ExcValueError, ExcTypeError, ExcAttributeError, ExcNameError, + ExcImportError, ExcIndexError, ExcKeyError, ExcStopIteration, + ExcGeneratorExit, ExcRuntimeError, ExcNotImplementedError, ExcOSError, + ExcFileNotFoundError, ExcPermissionError, ExcZeroDivisionError, + ExcOverflowError, ExcMemoryError, ExcKeyboardInterrupt, ExcSystemExit, + ExcAssertionError, ExcUnboundLocalError, ExcRecursionError, ExcUnicodeError, + ExcUnicodeDecodeError, ExcUnicodeEncodeError, + } + for _, c := range allExcClasses { + c.MRO = computeMRO(c) + if c.Dict == nil { + c.Dict = make(map[string]Object) + } + } +} + +// newException creates a new PyException for the given class with message args. +func newException(cls *PyClass, args ...Object) *PyException { + return &PyException{ + ExcClass: cls, + Args: args, + Dict: make(map[string]Object), + } +} + +// newExceptionf creates a PyException with a formatted message string. +func newExceptionf(cls *PyClass, format string, a ...interface{}) *PyException { + msg := fmt.Sprintf(format, a...) + return &PyException{ + ExcClass: cls, + Args: []Object{pyStr(msg)}, + Dict: make(map[string]Object), + } +} + +// isInstance checks if obj is an instance of cls (walks MRO). +func isInstance(obj Object, cls *PyClass) bool { + switch v := obj.(type) { + case *PyException: + return exceptionMatchesClass(v, cls) + case *PyInstance: + for _, c := range v.Class.MRO { + if c == cls { + return true + } + } + return false + case *PyNone: + return cls.Name == "NoneType" + case *PyBool: + return cls.Name == "bool" || cls.Name == "int" + case *PyInt: + return cls.Name == "int" + case *PyFloat: + return cls.Name == "float" + case *PyStr: + return cls.Name == "str" + case *PyBytes: + return cls.Name == "bytes" + case *PyList: + return cls.Name == "list" + case *PyTuple: + return cls.Name == "tuple" + case *PyDict: + return cls.Name == "dict" + case *PySet: + return cls.Name == "set" + } + return false +} + +// exceptionMatchesClass checks if a PyException matches a class (by MRO walk). +func exceptionMatchesClass(exc *PyException, cls *PyClass) bool { + for _, c := range exc.ExcClass.MRO { + if c == cls { + return true + } + } + return false +} + +// ---- PyFile ---- + +const maxFileReadBytes = 1 << 20 // 1 MiB + +// PyFile represents a Python file object. +type PyFile struct { + rc io.ReadWriteCloser + w io.Writer + r *bufio.Reader + name string + binary bool + closed bool + buf []byte + bufDone bool +} + +func (f *PyFile) pyType() *PyType { return typeFile } +func (f *PyFile) pyRepr() string { + mode := "r" + if f.binary { + mode = "rb" + } + return fmt.Sprintf("<_io.TextIOWrapper name='%s' mode='%s' encoding='UTF-8'>", f.name, mode) +} +func (f *PyFile) pyStr() string { return f.pyRepr() } + +func fileGetAttr(f *PyFile, name string) (Object, bool) { + switch name { + case "read": + return makeBuiltin("read", func(args []Object, kwargs map[string]Object) Object { + if f.closed { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "I/O operation on closed file.")}) + } + n := -1 + if len(args) > 0 && args[0] != pyNone { + if v, ok := args[0].(*PyInt); ok { + if i, ok2 := v.int64(); ok2 { + n = int(i) + } + } + } + return f.read(n) + }), true + case "readline": + return makeBuiltin("readline", func(args []Object, kwargs map[string]Object) Object { + if f.closed { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "I/O operation on closed file.")}) + } + if f.r != nil { + // f.r is a bufio.Reader already wrapping a LimitReader (set up in + // runInternal), so we reuse it directly rather than wrapping again. + // Creating a fresh LimitReader per call would give each readline() + // call its own independent 1 MiB budget and would also discard + // buffered bytes from f.r's internal buffer. + line, err := f.r.ReadString('\n') + if err != nil && err != io.EOF { + panic(exceptionSignal{exc: newExceptionf(ExcOSError, "readline error: %v", err)}) + } + if f.binary { + return pyBytes([]byte(line)) + } + return pyStr(line) + } + // For rc-based files + if !f.bufDone { + f.loadBuf() + } + idx := -1 + for i, b := range f.buf { + if b == '\n' { + idx = i + break + } + } + var line []byte + if idx < 0 { + line = f.buf + f.buf = nil + } else { + line = f.buf[:idx+1] + f.buf = f.buf[idx+1:] + } + if f.binary { + return pyBytes(line) + } + return pyStr(string(line)) + }), true + case "readlines": + return makeBuiltin("readlines", func(args []Object, kwargs map[string]Object) Object { + if f.closed { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "I/O operation on closed file.")}) + } + if !f.bufDone { + f.loadBuf() + } + lines := splitBytesLines(f.buf) + f.buf = nil + items := make([]Object, len(lines)) + for i, l := range lines { + if f.binary { + items[i] = pyBytes(l) + } else { + items[i] = pyStr(string(l)) + } + } + return pyList(items) + }), true + case "write": + return makeBuiltin("write", func(args []Object, kwargs map[string]Object) Object { + if f.closed { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, "I/O operation on closed file.")}) + } + if len(args) != 1 { + raiseTypeError("write() takes exactly 1 argument") + } + var data []byte + switch v := args[0].(type) { + case *PyStr: + data = []byte(v.v) + case *PyBytes: + data = v.v + default: + raiseTypeError("write() argument must be str or bytes") + } + var err error + if f.w != nil { + _, err = f.w.Write(data) + } else if f.rc != nil { + // Files opened via open() are always read-only; block writes at the + // application layer rather than relying solely on OS rejection. + panic(exceptionSignal{exc: newExceptionf(ExcPermissionError, "write() is not permitted on a file opened in read mode")}) + } + if err != nil { + panic(exceptionSignal{exc: newExceptionf(ExcOSError, "write error: %v", err)}) + } + return pyInt(int64(len(data))) + }), true + case "close": + return makeBuiltin("close", func(args []Object, kwargs map[string]Object) Object { + if !f.closed && f.rc != nil { + _ = f.rc.Close() + f.closed = true + } + return pyNone + }), true + case "__enter__": + return makeBuiltin("__enter__", func(args []Object, kwargs map[string]Object) Object { + return f + }), true + case "__exit__": + return makeBuiltin("__exit__", func(args []Object, kwargs map[string]Object) Object { + if !f.closed && f.rc != nil { + _ = f.rc.Close() + f.closed = true + } + return pyFalse + }), true + case "name": + return pyStr(f.name), true + case "closed": + return pyBool(f.closed), true + case "flush": + return makeBuiltin("flush", func(args []Object, kwargs map[string]Object) Object { + return pyNone + }), true + } + return nil, false +} + +func (f *PyFile) loadBuf() { + if f.bufDone { + return + } + f.bufDone = true + if f.rc != nil { + data, _ := io.ReadAll(io.LimitReader(f.rc, maxFileReadBytes+1)) + if len(data) > maxFileReadBytes { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "file content exceeds %d byte limit", maxFileReadBytes)}) + } + f.buf = data + } +} + +func (f *PyFile) read(n int) Object { + if f.r != nil { + // stdin-like reader + if n < 0 { + data, _ := io.ReadAll(io.LimitReader(f.r, maxFileReadBytes+1)) + if len(data) > maxFileReadBytes { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "stdin content exceeds %d byte limit", maxFileReadBytes)}) + } + if f.binary { + return pyBytes(data) + } + return pyStr(string(data)) + } + // Cap n to the per-file read limit to prevent OOM via large allocations. + if n > maxFileReadBytes { + n = maxFileReadBytes + } + buf := make([]byte, n) + total := 0 + for total < n { + nr, err := f.r.Read(buf[total:]) + total += nr + if err != nil { + break + } + } + if f.binary { + return pyBytes(buf[:total]) + } + return pyStr(string(buf[:total])) + } + // rc-based file + if !f.bufDone { + f.loadBuf() + } + var chunk []byte + if n < 0 { + chunk = f.buf + f.buf = nil + } else { + if n > len(f.buf) { + n = len(f.buf) + } + chunk = f.buf[:n] + f.buf = f.buf[n:] + } + if f.binary { + return pyBytes(chunk) + } + return pyStr(string(chunk)) +} + +func splitBytesLines(b []byte) [][]byte { + var lines [][]byte + for len(b) > 0 { + idx := -1 + for i, c := range b { + if c == '\n' { + idx = i + break + } + } + if idx < 0 { + lines = append(lines, b) + break + } + lines = append(lines, b[:idx+1]) + b = b[idx+1:] + } + return lines +} + +// ---- PyBoundMethod ---- + +// PyBoundMethod binds a method to its self object. +type PyBoundMethod struct { + Self Object + Func *PyFunction +} + +func (m *PyBoundMethod) pyType() *PyType { return typeBoundMethod } +func (m *PyBoundMethod) pyRepr() string { + return "" +} +func (m *PyBoundMethod) pyStr() string { return m.pyRepr() } + +// ---- Scope ---- + +// Scope represents a variable scope (function frame or module level). +type Scope struct { + vars map[string]Object + parent *Scope + globals map[string]Object + globalNames map[string]bool + nonlocalNames map[string]bool + class *PyClass + funcName string + file string + line int +} + +func newModuleScope(globals map[string]Object) *Scope { + return &Scope{ + vars: globals, + globals: globals, + } +} + +func newFunctionScope(parent *Scope, globals map[string]Object, funcName string) *Scope { + return &Scope{ + vars: make(map[string]Object), + parent: parent, + globals: globals, + funcName: funcName, + } +} + +func (s *Scope) get(name string) (Object, bool) { + // Check globals declaration + if s.globalNames != nil && s.globalNames[name] { + if v, ok := s.globals[name]; ok { + return v, true + } + return nil, false + } + // Check nonlocal + if s.nonlocalNames != nil && s.nonlocalNames[name] { + p := s.parent + for p != nil && p.globals != nil { + if v, ok := p.vars[name]; ok { + return v, true + } + p = p.parent + } + return nil, false + } + // Local first + if v, ok := s.vars[name]; ok { + return v, true + } + // Walk up to globals (but not through sibling scopes) + if s.parent != nil && !s.isGlobalScope() { + return s.parent.get(name) + } + return nil, false +} + +func (s *Scope) set(name string, val Object) { + if s.globalNames != nil && s.globalNames[name] { + s.globals[name] = val + return + } + if s.nonlocalNames != nil && s.nonlocalNames[name] { + p := s.parent + for p != nil && !p.isGlobalScope() { + if _, ok := p.vars[name]; ok { + p.vars[name] = val + return + } + p = p.parent + } + // If not found, set in parent + if s.parent != nil { + s.parent.vars[name] = val + } + return + } + s.vars[name] = val +} + +// isGlobalScope returns true if this scope is the module/global scope. +func (s *Scope) isGlobalScope() bool { + return s.parent == nil +} + +func (s *Scope) delete(name string) bool { + if _, ok := s.vars[name]; ok { + delete(s.vars, name) + return true + } + return false +} + +// ---- Utility functions ---- + +// pyTruth returns the Python truth value of obj. +func pyTruth(obj Object) bool { + if obj == nil || obj == pyNone { + return false + } + switch v := obj.(type) { + case *PyBool: + return v.v + case *PyInt: + if v.big != nil { + return v.big.Sign() != 0 + } + return v.small != 0 + case *PyFloat: + return v.v != 0 + case *PyStr: + return len(v.v) > 0 + case *PyBytes: + return len(v.v) > 0 + case *PyList: + return len(v.items) > 0 + case *PyTuple: + return len(v.items) > 0 + case *PyDict: + return len(v.keys) > 0 + case *PySet: + return len(v.items) > 0 + case *PyFrozenSet: + return len(v.items) > 0 + case *PyRange: + return v.length() > 0 + } + return true +} + +// pyEq returns true if a == b (Python equality). +func pyEq(a, b Object) bool { + if a == b { + return true + } + if a == nil || b == nil { + return false + } + switch av := a.(type) { + case *PyNone: + _, ok := b.(*PyNone) + return ok + case *PyBool: + switch bv := b.(type) { + case *PyBool: + return av.v == bv.v + case *PyInt: + var ai int64 + if av.v { + ai = 1 + } + if n, ok := bv.int64(); ok { + return ai == n + } + } + case *PyInt: + switch bv := b.(type) { + case *PyInt: + if av.big == nil && bv.big == nil { + return av.small == bv.small + } + return av.toBigInt().Cmp(bv.toBigInt()) == 0 + case *PyBool: + var bi int64 + if bv.v { + bi = 1 + } + if n, ok := av.int64(); ok { + return n == bi + } + case *PyFloat: + if n, ok := av.int64(); ok { + return float64(n) == bv.v + } + } + case *PyFloat: + switch bv := b.(type) { + case *PyFloat: + return av.v == bv.v + case *PyInt: + if n, ok := bv.int64(); ok { + return av.v == float64(n) + } + } + case *PyStr: + if bv, ok := b.(*PyStr); ok { + return av.v == bv.v + } + case *PyBytes: + if bv, ok := b.(*PyBytes); ok { + if len(av.v) != len(bv.v) { + return false + } + for i := range av.v { + if av.v[i] != bv.v[i] { + return false + } + } + return true + } + case *PyList: + if bv, ok := b.(*PyList); ok { + if len(av.items) != len(bv.items) { + return false + } + for i := range av.items { + if !pyEq(av.items[i], bv.items[i]) { + return false + } + } + return true + } + case *PyTuple: + if bv, ok := b.(*PyTuple); ok { + if len(av.items) != len(bv.items) { + return false + } + for i := range av.items { + if !pyEq(av.items[i], bv.items[i]) { + return false + } + } + return true + } + } + return false +} + +// pyCompare returns -1, 0, +1 for a < b, a == b, a > b. +func pyCompare(a, b Object) int { + if pyEq(a, b) { + return 0 + } + switch av := a.(type) { + case *PyInt: + switch bv := b.(type) { + case *PyInt: + return av.toBigInt().Cmp(bv.toBigInt()) + case *PyFloat: + if n, ok := av.int64(); ok { + f := float64(n) + if f < bv.v { + return -1 + } else if f > bv.v { + return 1 + } + return 0 + } + case *PyBool: + var bi int64 + if bv.v { + bi = 1 + } + if n, ok := av.int64(); ok { + if n < bi { + return -1 + } else if n > bi { + return 1 + } + return 0 + } + } + case *PyFloat: + switch bv := b.(type) { + case *PyFloat: + if av.v < bv.v { + return -1 + } + return 1 + case *PyInt: + if n, ok := bv.int64(); ok { + f := float64(n) + if av.v < f { + return -1 + } else if av.v > f { + return 1 + } + return 0 + } + } + case *PyStr: + if bv, ok := b.(*PyStr); ok { + if av.v < bv.v { + return -1 + } + return 1 + } + case *PyBool: + var ai int64 + if av.v { + ai = 1 + } + switch bv := b.(type) { + case *PyBool: + var bi int64 + if bv.v { + bi = 1 + } + if ai < bi { + return -1 + } else if ai > bi { + return 1 + } + return 0 + case *PyInt: + if n, ok := bv.int64(); ok { + if ai < n { + return -1 + } else if ai > n { + return 1 + } + return 0 + } + } + case *PyList: + if bv, ok := b.(*PyList); ok { + minLen := len(av.items) + if len(bv.items) < minLen { + minLen = len(bv.items) + } + for i := 0; i < minLen; i++ { + c := pyCompare(av.items[i], bv.items[i]) + if c != 0 { + return c + } + } + if len(av.items) < len(bv.items) { + return -1 + } else if len(av.items) > len(bv.items) { + return 1 + } + return 0 + } + case *PyTuple: + if bv, ok := b.(*PyTuple); ok { + minLen := len(av.items) + if len(bv.items) < minLen { + minLen = len(bv.items) + } + for i := 0; i < minLen; i++ { + c := pyCompare(av.items[i], bv.items[i]) + if c != 0 { + return c + } + } + if len(av.items) < len(bv.items) { + return -1 + } else if len(av.items) > len(bv.items) { + return 1 + } + return 0 + } + case *PyBytes: + if bv, ok := b.(*PyBytes); ok { + for i := 0; i < len(av.v) && i < len(bv.v); i++ { + if av.v[i] < bv.v[i] { + return -1 + } + if av.v[i] > bv.v[i] { + return 1 + } + } + if len(av.v) < len(bv.v) { + return -1 + } else if len(av.v) > len(bv.v) { + return 1 + } + return 0 + } + } + raiseTypeError("'%s' not supported between instances of '%s' and '%s'", + "<", a.pyType().Name, b.pyType().Name) + return 0 +} + +// hashKey returns a comparable Go value for dict/set operations. +func hashKey(obj Object) (any, error) { + switch v := obj.(type) { + case *PyNone: + return nil, nil + case *PyBool: + if v.v { + return int64(1), nil + } + return int64(0), nil + case *PyInt: + if v.big == nil { + return v.small, nil + } + return v.big.String(), nil + case *PyFloat: + // If float is integer-valued, use the int key for consistency + if v.v == float64(int64(v.v)) { + return int64(v.v), nil + } + return v.v, nil + case *PyStr: + return v.v, nil + case *PyBytes: + return string(v.v), nil + case *PyTuple: + // Use a string encoding for tuples + parts := make([]string, len(v.items)) + for i, item := range v.items { + k, err := hashKey(item) + if err != nil { + return nil, err + } + parts[i] = fmt.Sprintf("%T:%v", k, k) + } + return "tuple:" + strings.Join(parts, ","), nil + case *PyList: + return nil, fmt.Errorf("unhashable type: 'list'") + case *PyDict: + return nil, fmt.Errorf("unhashable type: 'dict'") + case *PySet: + return nil, fmt.Errorf("unhashable type: 'set'") + case *PyClass: + return fmt.Sprintf("class:%p", v), nil + case *PyInstance: + return fmt.Sprintf("instance:%p", v), nil + case *PyFunction: + return fmt.Sprintf("function:%p", v), nil + case *PyBuiltin: + return fmt.Sprintf("builtin:%p", v), nil + } + return fmt.Sprintf("obj:%p", obj), nil +} + +// raiseTypeError panics with a TypeError. +func raiseTypeError(msg string, a ...interface{}) { + panic(exceptionSignal{exc: newExceptionf(ExcTypeError, msg, a...)}) +} + +// raiseValueError panics with a ValueError. +func raiseValueError(msg string, a ...interface{}) { + panic(exceptionSignal{exc: newExceptionf(ExcValueError, msg, a...)}) +} + +// raiseAttributeError panics with AttributeError. +func raiseAttributeError(typeName, attr string) { + panic(exceptionSignal{exc: newExceptionf(ExcAttributeError, "'%s' object has no attribute '%s'", typeName, attr)}) +} + +// raiseIndexError panics with IndexError. +func raiseIndexError(msg string) { + panic(exceptionSignal{exc: newExceptionf(ExcIndexError, "%s", msg)}) +} + +// raiseKeyError panics with KeyError for a key object. +func raiseKeyError(key Object) { + panic(exceptionSignal{exc: newException(ExcKeyError, key)}) +} + +// raiseNameError panics with NameError. +func raiseNameError(name string) { + panic(exceptionSignal{exc: newExceptionf(ExcNameError, "name '%s' is not defined", name)}) +} + +// normalizeIndex handles Python's negative indexing. +func normalizeIndex(i, length int) int { + if i < 0 { + i += length + } + return i +} + +// toNumber converts obj to a numeric type for arithmetic. +func toNumber(obj Object) (Object, bool) { + switch obj.(type) { + case *PyInt, *PyFloat, *PyBool: + return obj, true + } + return nil, false +} + +// toIntVal extracts an int64 from a PyInt or PyBool. +// If the value is a big integer that does not fit in int64 it raises +// IndexError (matching CPython's "cannot fit 'int' into an index-sized integer"). +func toIntVal(obj Object) int64 { + switch v := obj.(type) { + case *PyInt: + if n, ok := v.int64(); ok { + return n + } + panic(exceptionSignal{exc: newExceptionf(ExcIndexError, "cannot fit 'int' into an index-sized integer")}) + case *PyBool: + if v.v { + return 1 + } + return 0 + case *PyFloat: + return int64(v.v) + } + raiseTypeError("expected int, got %s", obj.pyType().Name) + return 0 +} + +// toIntValBig extracts a *big.Int from a PyInt, PyBool, or PyFloat. +// Unlike toIntVal it never truncates big integers. +func toIntValBig(obj Object) *big.Int { + switch v := obj.(type) { + case *PyInt: + return v.toBigInt() + case *PyBool: + if v.v { + return big.NewInt(1) + } + return big.NewInt(0) + case *PyFloat: + return new(big.Int).SetInt64(int64(v.v)) + } + raiseTypeError("expected int, got %s", obj.pyType().Name) + return nil +} + +// collectIterable collects all items from an iterable into a slice. +func collectIterable(obj Object) []Object { + switch v := obj.(type) { + case *PyList: + result := make([]Object, len(v.items)) + copy(result, v.items) + return result + case *PyTuple: + result := make([]Object, len(v.items)) + copy(result, v.items) + return result + case *PyStr: + runes := []rune(v.v) + if len(runes) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "string too large to iterate (length %d exceeds limit %d)", len(runes), maxGeneratorItems)}) + } + result := make([]Object, len(runes)) + for i, r := range runes { + result[i] = pyStr(string(r)) + } + return result + case *PyBytes: + if len(v.v) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "bytes too large to iterate (length %d exceeds limit %d)", len(v.v), maxGeneratorItems)}) + } + result := make([]Object, len(v.v)) + for i, b := range v.v { + result[i] = pyInt(int64(b)) + } + return result + case *PyRange: + n := v.length() + // Guard against huge range lengths (e.g. list(range(0, 1<<62))) that + // would cause make([]Object, n) to panic with "makeslice: len out of range". + if n > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "range too large to materialize (length %d exceeds limit %d)", n, maxGeneratorItems)}) + } + result := make([]Object, n) + cur := v.start + for i := int64(0); i < n; i++ { + result[i] = pyInt(cur) + cur += v.step + } + return result + case *PyDict: + result := make([]Object, len(v.keys)) + copy(result, v.keys) + return result + case *PySet: + result := make([]Object, 0, len(v.items)) + for _, item := range v.items { + result = append(result, item) + } + return result + case *PyFrozenSet: + result := make([]Object, 0, len(v.items)) + for _, item := range v.items { + result = append(result, item) + } + return result + case *PyGenerator: + return drainGenerator(v) + case *rangeIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + if len(result) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "iterable produced too many items (limit %d)", maxGeneratorItems)}) + } + } + return result + case *PyMapIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + if len(result) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "iterable produced too many items (limit %d)", maxGeneratorItems)}) + } + } + return result + case *PyFilterIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + if len(result) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "iterable produced too many items (limit %d)", maxGeneratorItems)}) + } + } + return result + case *PyZipIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + if len(result) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "iterable produced too many items (limit %d)", maxGeneratorItems)}) + } + } + return result + case *PyEnumerateIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + if len(result) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "iterable produced too many items (limit %d)", maxGeneratorItems)}) + } + } + return result + case *PyReversedIter: + var result []Object + for { + item, ok := v.next() + if !ok { + break + } + result = append(result, item) + if len(result) > maxGeneratorItems { + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "iterable produced too many items (limit %d)", maxGeneratorItems)}) + } + } + return result + } + raiseTypeError("'%s' object is not iterable", obj.pyType().Name) + return nil +} + +// maxGeneratorItems is the maximum number of items drainGenerator will collect +// from an infinite generator before raising MemoryError (~128k items at 8 bytes each = 1 MiB). +const maxGeneratorItems = 1 << 17 // 128k items + +// drainGenerator collects all values from a generator, respecting context +// cancellation and capping the result at maxGeneratorItems to prevent OOM. +func drainGenerator(g *PyGenerator) []Object { + var result []Object + ctx := g.ctx + for !g.done { + if g.awaitingSend { + select { + case g.sendCh <- pyNone: + case <-ctx.Done(): + g.done = true + // The generator may be blocked on yieldCh <- val; drain it so the + // goroutine can exit rather than leaking. + select { + case <-g.yieldCh: + default: + } + panic(exceptionSignal{exc: newExceptionf(ExcKeyboardInterrupt, "")}) + } + g.awaitingSend = false + } + select { + case val, ok := <-g.yieldCh: + if !ok { + g.done = true + return result + } + g.awaitingSend = true + result = append(result, val) + if len(result) > maxGeneratorItems { + g.done = true + panic(exceptionSignal{exc: newExceptionf(ExcMemoryError, "generator produced too many items (limit %d)", maxGeneratorItems)}) + } + case <-ctx.Done(): + g.done = true + // Non-blocking drain: if the generator goroutine is blocked on + // yieldCh <- val, receive that value so the goroutine can observe + // g.done == true (or ctx.Done()) and exit rather than hanging forever. + select { + case <-g.yieldCh: + default: + } + panic(exceptionSignal{exc: newExceptionf(ExcKeyboardInterrupt, "")}) + } + } + return result +} + +// nextFromIterable returns the next item from an iterable object. +// Returns (val, true) or (nil, false) at exhaustion. +func nextFromIterable(obj Object) (Object, bool) { + switch v := obj.(type) { + case *rangeIter: + return v.next() + case *PyMapIter: + return v.next() + case *PyFilterIter: + return v.next() + case *PyZipIter: + return v.next() + case *PyEnumerateIter: + return v.next() + case *PyReversedIter: + return v.next() + case *PyGenerator: + if v.done { + return nil, false + } + if v.awaitingSend { + v.sendCh <- pyNone + v.awaitingSend = false + } + val, ok := <-v.yieldCh + if !ok { + v.done = true + // Check if the generator exited due to a non-StopIteration exception + // and propagate it to the caller. + if v.excCh != nil { + select { + case exc := <-v.excCh: + panic(exceptionSignal{exc: exc}) + default: + } + } + return nil, false + } + v.awaitingSend = true + return val, true + case *PyList: + // Not really an iterator, but handle via index + return nil, false + } + return nil, false +} + +// sortList sorts a list in place using an optional key function. +func sortList(items []Object, keyFn Object, reverse bool) { + // Simple insertion sort (stable, correct for small lists) + getKey := func(item Object) Object { + if keyFn == nil { + return item + } + return callObject(keyFn, []Object{item}, nil) + } + for i := 1; i < len(items); i++ { + cur := items[i] // save before inner loop shifts elements + key := getKey(cur) + j := i + for j > 0 && func() bool { + c := pyCompare(getKey(items[j-1]), key) + if reverse { + return c < 0 + } + return c > 0 + }() { + items[j] = items[j-1] + j-- + } + items[j] = cur + } +} + +// mustStr extracts a string from an Object or raises TypeError. +func mustStr(obj Object, fnName string) string { + switch v := obj.(type) { + case *PyStr: + return v.v + } + raiseTypeError("%s() argument must be str, not '%s'", fnName, obj.pyType().Name) + return "" +} + +// ---- Lazy iterator types ---- + +// PyMapIter is a lazy map() iterator. +type PyMapIter struct { + fn Object + iters []Object // underlying iterators (as list slices for simplicity) + idx int + items [][]Object // pre-collected for each iterable +} + +func (m *PyMapIter) pyType() *PyType { return typeMapIter } +func (m *PyMapIter) pyRepr() string { return "" } +func (m *PyMapIter) pyStr() string { return m.pyRepr() } + +func (m *PyMapIter) next() (Object, bool) { + if m.idx >= len(m.items[0]) { + return nil, false + } + args := make([]Object, len(m.items)) + for i, items := range m.items { + if m.idx >= len(items) { + return nil, false + } + args[i] = items[m.idx] + } + m.idx++ + result := callObject(m.fn, args, nil) + return result, true +} + +// PyFilterIter is a lazy filter() iterator. +type PyFilterIter struct { + fn Object // nil means filter by truth + items []Object + idx int +} + +func (f *PyFilterIter) pyType() *PyType { return typeFilterIter } +func (f *PyFilterIter) pyRepr() string { return "" } +func (f *PyFilterIter) pyStr() string { return f.pyRepr() } + +func (f *PyFilterIter) next() (Object, bool) { + for f.idx < len(f.items) { + item := f.items[f.idx] + f.idx++ + if f.fn == nil || f.fn == pyNone { + if pyTruth(item) { + return item, true + } + } else { + result := callObject(f.fn, []Object{item}, nil) + if pyTruth(result) { + return item, true + } + } + } + return nil, false +} + +// PyZipIter is a lazy zip() iterator. +type PyZipIter struct { + items [][]Object + idx int +} + +func (z *PyZipIter) pyType() *PyType { return typeZipIter } +func (z *PyZipIter) pyRepr() string { return "" } +func (z *PyZipIter) pyStr() string { return z.pyRepr() } + +func (z *PyZipIter) next() (Object, bool) { + if len(z.items) == 0 { + return nil, false + } + for _, items := range z.items { + if z.idx >= len(items) { + return nil, false + } + } + tuple := make([]Object, len(z.items)) + for i, items := range z.items { + tuple[i] = items[z.idx] + } + z.idx++ + return pyTuple(tuple), true +} + +// PyEnumerateIter is a lazy enumerate() iterator. +type PyEnumerateIter struct { + items []Object + idx int + counter int64 +} + +func (e *PyEnumerateIter) pyType() *PyType { return typeEnumerateIter } +func (e *PyEnumerateIter) pyRepr() string { return "" } +func (e *PyEnumerateIter) pyStr() string { return e.pyRepr() } + +func (e *PyEnumerateIter) next() (Object, bool) { + if e.idx >= len(e.items) { + return nil, false + } + val := pyTuple([]Object{pyInt(e.counter), e.items[e.idx]}) + e.idx++ + e.counter++ + return val, true +} + +// PyReversedIter is a reversed iterator. +type PyReversedIter struct { + items []Object + idx int +} + +func (r *PyReversedIter) pyType() *PyType { return typeReversedIter } +func (r *PyReversedIter) pyRepr() string { return "" } +func (r *PyReversedIter) pyStr() string { return r.pyRepr() } + +func (r *PyReversedIter) next() (Object, bool) { + if r.idx < 0 { + return nil, false + } + val := r.items[r.idx] + r.idx-- + return val, true +} + +// PyListIter is a forward list iterator. +type PyListIter struct { + items []Object + idx int +} + +func (r *PyListIter) pyType() *PyType { return typeList } +func (r *PyListIter) pyRepr() string { return "" } +func (r *PyListIter) pyStr() string { return r.pyRepr() } + +func (r *PyListIter) next() (Object, bool) { + if r.idx >= len(r.items) { + return nil, false + } + val := r.items[r.idx] + r.idx++ + return val, true +} + +// PyDictKeyIter iterates over dict keys. +type PyDictKeyIter struct { + keys []Object + idx int +} + +func (d *PyDictKeyIter) pyType() *PyType { return typeDict } +func (d *PyDictKeyIter) pyRepr() string { return "" } +func (d *PyDictKeyIter) pyStr() string { return d.pyRepr() } + +func (d *PyDictKeyIter) next() (Object, bool) { + if d.idx >= len(d.keys) { + return nil, false + } + val := d.keys[d.idx] + d.idx++ + return val, true +} diff --git a/builtins/tests/python/python_fuzz_test.go b/builtins/tests/python/python_fuzz_test.go new file mode 100644 index 00000000..7e24bf60 --- /dev/null +++ b/builtins/tests/python/python_fuzz_test.go @@ -0,0 +1,87 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python_test + +import ( + "context" + "fmt" + "os" + "sync/atomic" + "testing" + "time" + + "github.com/DataDog/rshell/builtins/testutil" + "github.com/DataDog/rshell/interp" +) + +// FuzzPythonSource fuzzes arbitrary Python source code via python -c. +// The goal is to ensure the interpreter never panics regardless of input. +func FuzzPythonSource(f *testing.F) { + f.Add("print('hello')") + f.Add("import sys; sys.exit(0)") + f.Add("raise ValueError('oops')") + f.Add("def foo(: pass") // syntax error + f.Add("x = 1/0") // runtime error + f.Add("import os; os.system('id')") // sandbox violation + f.Add("open('/tmp/x', 'w')") // write blocked + f.Add("import tempfile; tempfile.mkstemp()") // blocked module + f.Add("while True: pass") // infinite loop (short ctx) + f.Add("print('a' * 10000)") // large output + f.Add("") // empty source + + baseDir := f.TempDir() + var counter atomic.Int64 + + f.Fuzz(func(t *testing.T, src string) { + dir, cleanup := testutil.FuzzIterDir(t, baseDir, &counter) + defer cleanup() + + // Use a tight timeout to prevent infinite loops from hanging the corpus. + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + script := fmt.Sprintf("python -c %q", src) + // We only care that it doesn't panic or hang. + testutil.RunScriptCtx(ctx, t, script, dir, interp.AllowedPaths([]string{dir})) + }) +} + +// FuzzPythonFileContent fuzzes arbitrary content in a script file. +func FuzzPythonFileContent(f *testing.F) { + f.Add([]byte("print('hello')\n")) + f.Add([]byte("import sys\nsys.exit(0)\n")) + f.Add([]byte("raise RuntimeError('oops')\n")) + f.Add([]byte("def foo(:\n pass\n")) // syntax error + f.Add([]byte("")) + + baseDir := f.TempDir() + var counter atomic.Int64 + + f.Fuzz(func(t *testing.T, content []byte) { + dir, cleanup := testutil.FuzzIterDir(t, baseDir, &counter) + defer cleanup() + + scriptPath := dir + "/script.py" + if err := writeFile(scriptPath, content); err != nil { + t.Skip("write error:", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + testutil.RunScriptCtx(ctx, t, "python script.py", dir, interp.AllowedPaths([]string{dir})) + }) +} + +func writeFile(path string, content []byte) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + _, err = f.Write(content) + return err +} diff --git a/builtins/tests/python/python_test.go b/builtins/tests/python/python_test.go new file mode 100644 index 00000000..4df0a539 --- /dev/null +++ b/builtins/tests/python/python_test.go @@ -0,0 +1,339 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package python_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/builtins/testutil" + "github.com/DataDog/rshell/interp" +) + +func cmdRun(t *testing.T, script, dir string) (stdout, stderr string, exitCode int) { + t.Helper() + return testutil.RunScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func cmdRunCtx(ctx context.Context, t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, interp.AllowedPaths([]string{dir})) +} + +// ---- Basic execution ---- + +func TestPrintInline(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, `python -c "print('hello')"`, dir) + assert.Equal(t, "hello\n", stdout) + assert.Empty(t, stderr) + assert.Equal(t, 0, code) +} + +func TestArithmetic(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, `python -c "print(2 + 3)"`, dir) + assert.Equal(t, "5\n", stdout) + assert.Empty(t, stderr) + assert.Equal(t, 0, code) +} + +func TestStringOps(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, `python -c "print('hello' + ' world')"`, dir) + assert.Equal(t, "hello world\n", stdout) + assert.Empty(t, stderr) + assert.Equal(t, 0, code) +} + +func TestHelpFlag(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, `python --help`, dir) + assert.Contains(t, stdout, "Usage: python") + assert.Empty(t, stderr) + assert.Equal(t, 0, code) +} + +func TestHelpShortFlag(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, `python -h`, dir) + assert.Contains(t, stdout, "Usage: python") + assert.Equal(t, 0, code) +} + +// ---- sys.exit ---- + +func TestSysExitZero(t *testing.T) { + dir := t.TempDir() + _, _, code := cmdRun(t, `python -c "import sys; sys.exit(0)"`, dir) + assert.Equal(t, 0, code) +} + +func TestSysExitNonzero(t *testing.T) { + dir := t.TempDir() + _, _, code := cmdRun(t, `python -c "import sys; sys.exit(42)"`, dir) + assert.Equal(t, 42, code) +} + +func TestSysExitOne(t *testing.T) { + dir := t.TempDir() + _, _, code := cmdRun(t, `python -c "import sys; sys.exit(1)"`, dir) + assert.Equal(t, 1, code) +} + +func TestSysExitPropagatesAsShellDollarQuestion(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, `python -c "import sys; sys.exit(7)"; echo "code=$?"`, dir) + assert.Equal(t, "code=7\n", stdout) + assert.Equal(t, 0, code) +} + +// ---- Script file execution ---- + +func TestRunScriptFile(t *testing.T) { + dir := t.TempDir() + err := os.WriteFile(filepath.Join(dir, "hello.py"), []byte(`print("hello from script")`+"\n"), 0644) + require.NoError(t, err) + stdout, stderr, code := cmdRun(t, `python hello.py`, dir) + assert.Equal(t, "hello from script\n", stdout) + assert.Empty(t, stderr) + assert.Equal(t, 0, code) +} + +func TestRunScriptFileWithArgs(t *testing.T) { + dir := t.TempDir() + err := os.WriteFile(filepath.Join(dir, "args.py"), []byte("import sys\nprint(sys.argv[1])\n"), 0644) + require.NoError(t, err) + stdout, _, code := cmdRun(t, `python args.py myarg`, dir) + assert.Equal(t, "myarg\n", stdout) + assert.Equal(t, 0, code) +} + +func TestMissingScriptFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python nonexistent.py`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "python:") + assert.Contains(t, stderr, "nonexistent.py") +} + +// ---- Stdin mode ---- + +func TestStdinDash(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, `echo "print('from stdin')" | python -`, dir) + assert.Equal(t, "from stdin\n", stdout) + assert.Equal(t, 0, code) +} + +// ---- File I/O via open() ---- + +func TestOpenReadFile(t *testing.T) { + dir := t.TempDir() + err := os.WriteFile(filepath.Join(dir, "data.txt"), []byte("content\n"), 0644) + require.NoError(t, err) + stdout, stderr, code := cmdRun(t, `python -c "f = open('data.txt'); print(f.read().strip()); f.close()"`, dir) + assert.Equal(t, "content\n", stdout) + assert.Empty(t, stderr) + assert.Equal(t, 0, code) +} + +func TestWithStatementOpenClose(t *testing.T) { + dir := t.TempDir() + err := os.WriteFile(filepath.Join(dir, "data.txt"), []byte("hello\n"), 0644) + require.NoError(t, err) + script := "python -c \"\nwith open('data.txt') as f:\n print(f.read().strip())\n\"" + stdout, stderr, code := cmdRun(t, script, dir) + assert.Equal(t, "hello\n", stdout) + assert.Empty(t, stderr) + assert.Equal(t, 0, code) +} + +// ---- Security sandbox ---- + +func TestOsSystemBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "import os; os.system('id')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "AttributeError") +} + +func TestOsPopenBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "import os; os.popen('id')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "AttributeError") +} + +func TestOsRemoveBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "import os; os.remove('/tmp/x')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "AttributeError") +} + +func TestOsMkdirBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "import os; os.mkdir('/tmp/x')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "AttributeError") +} + +func TestOsExeclBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "import os; os.execl('/bin/sh', 'sh')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "AttributeError") +} + +func TestOpenWriteModeBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "open('/tmp/evil.txt', 'w')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "PermissionError") +} + +func TestOpenAppendModeBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "open('/tmp/evil.txt', 'a')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "PermissionError") +} + +func TestOpenExclusiveCreateBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "open('/tmp/evil.txt', 'x')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "PermissionError") +} + +func TestOpenReadWriteModeBlocked(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "open('/tmp/evil.txt', 'r+')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "PermissionError") +} + +func TestOpenOutsideAllowedPaths(t *testing.T) { + dir := t.TempDir() + // Allowed paths is set to dir; /etc/passwd is outside it. + _, stderr, code := cmdRun(t, `python -c "open('/etc/passwd')"`, dir) + assert.Equal(t, 1, code) + assert.NotEmpty(t, stderr) +} + +func TestTempfileNeutered(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "import tempfile; tempfile.mkstemp()"`, dir) + assert.Equal(t, 1, code) + assert.NotEmpty(t, stderr) +} + +func TestGlobNeutered(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "import glob; glob.glob('*')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "ImportError") +} + +// ---- Error handling ---- + +func TestSyntaxError(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "def foo("`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "SyntaxError") +} + +func TestRuntimeException(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "raise ValueError('oops')"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "ValueError") + assert.Contains(t, stderr, "oops") +} + +func TestDivisionByZero(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python -c "x = 1/0"`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "ZeroDivisionError") +} + +func TestUnknownFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `python --unknown-flag`, dir) + assert.Equal(t, 1, code) + assert.NotEmpty(t, stderr) +} + +// ---- Context cancellation ---- + +func TestContextCancellation(t *testing.T) { + dir := t.TempDir() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + // Infinite loop — should be killed by context deadline. + _, _, code := cmdRunCtx(ctx, t, `python -c "while True: pass"`, dir) + // After context cancellation the shell returns exit code 1. + assert.Equal(t, 1, code) +} + +// ---- Stdlib availability ---- + +func TestMathModule(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, `python -c "import math; print(math.floor(3.7))"`, dir) + assert.Equal(t, "3\n", stdout) + assert.Equal(t, 0, code) +} + +func TestSysArgv(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, `python -c "import sys; print(sys.argv[0])"`, dir) + assert.Equal(t, "\n", stdout) + assert.Equal(t, 0, code) +} + +// ---- Output to stderr ---- + +func TestStderrOutput(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, `python -c "import sys; sys.stderr.write('err msg\n')"`, dir) + assert.Empty(t, stdout) + assert.Equal(t, "err msg\n", stderr) + assert.Equal(t, 0, code) +} + +// ---- Memory safety ---- + +func TestLargeOutputDoesNotCrash(t *testing.T) { + dir := t.TempDir() + // Print 100 lines — small enough to complete quickly but exercises output path. + stdout, _, code := testutil.RunScript(t, `python -c " +for i in range(100): + print('line ' + str(i)) +"`, dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + lines := strings.Split(strings.TrimSpace(stdout), "\n") + assert.Equal(t, 100, len(lines)) +} + +func TestReadlineFromFile(t *testing.T) { + dir := t.TempDir() + err := os.WriteFile(filepath.Join(dir, "lines.txt"), []byte("first\nsecond\nthird\n"), 0644) + require.NoError(t, err) + stdout, _, code := cmdRun(t, `python -c "f = open('lines.txt'); print(f.readline().strip())"`, dir) + assert.Equal(t, "first\n", stdout) + assert.Equal(t, 0, code) +} diff --git a/interp/register_builtins.go b/interp/register_builtins.go index d16f1b69..2d0bb475 100644 --- a/interp/register_builtins.go +++ b/interp/register_builtins.go @@ -25,6 +25,7 @@ import ( "github.com/DataDog/rshell/builtins/ping" printfcmd "github.com/DataDog/rshell/builtins/printf" pscmd "github.com/DataDog/rshell/builtins/ps" + "github.com/DataDog/rshell/builtins/python" "github.com/DataDog/rshell/builtins/sed" sortcmd "github.com/DataDog/rshell/builtins/sort" "github.com/DataDog/rshell/builtins/ss" @@ -60,6 +61,7 @@ func registerBuiltins() { sortcmd.Cmd, printfcmd.Cmd, pscmd.Cmd, + python.Cmd, sed.Cmd, ss.Cmd, strings_cmd.Cmd, diff --git a/tests/scenarios/cmd/python/basic/arithmetic.yaml b/tests/scenarios/cmd/python/basic/arithmetic.yaml new file mode 100644 index 00000000..5a440e42 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/arithmetic.yaml @@ -0,0 +1,10 @@ +description: python evaluates arithmetic expressions. +skip_assert_against_bash: true +input: + script: |+ + python -c "print(2 + 3)" +expect: + stdout: |+ + 5 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/binascii_module.yaml b/tests/scenarios/cmd/python/basic/binascii_module.yaml new file mode 100644 index 00000000..4e673b07 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/binascii_module.yaml @@ -0,0 +1,10 @@ +description: python can import and use the binascii module for hex encoding. +skip_assert_against_bash: true +input: + script: |+ + python -c "import binascii; binascii.hexlify(b'AB'); print('ok')" +expect: + stdout: |+ + ok + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/classes.yaml b/tests/scenarios/cmd/python/basic/classes.yaml new file mode 100644 index 00000000..422ea5aa --- /dev/null +++ b/tests/scenarios/cmd/python/basic/classes.yaml @@ -0,0 +1,31 @@ +description: python supports class definitions with __init__ and instance methods. +skip_assert_against_bash: true +setup: + files: + - path: counter.py + content: |+ + class Counter: + def __init__(self): + self.count = 0 + + def increment(self): + self.count += 1 + + def value(self): + return self.count + + c = Counter() + c.increment() + c.increment() + c.increment() + print(c.value()) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python counter.py +expect: + stdout: |+ + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/dict_operations.yaml b/tests/scenarios/cmd/python/basic/dict_operations.yaml new file mode 100644 index 00000000..13743ebf --- /dev/null +++ b/tests/scenarios/cmd/python/basic/dict_operations.yaml @@ -0,0 +1,23 @@ +description: python supports dict creation, key access, and mutation. +skip_assert_against_bash: true +setup: + files: + - path: dicts.py + content: |+ + d = {'name': 'rshell', 'version': 1} + print(d['name']) + print(d['version']) + d['extra'] = 'ok' + print(len(d)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python dicts.py +expect: + stdout: |+ + rshell + 1 + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/drain_iter_memory_limit.yaml b/tests/scenarios/cmd/python/basic/drain_iter_memory_limit.yaml new file mode 100644 index 00000000..3fad2654 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/drain_iter_memory_limit.yaml @@ -0,0 +1,27 @@ +description: python drainIter raises MemoryError when a custom iterator produces too many items. +skip_assert_against_bash: true +setup: + files: + - path: inf_iter.py + content: |+ + class Inf: + def __iter__(self): + return self + def __next__(self): + return 1 + + try: + for x in Inf(): + pass + except MemoryError: + print('MemoryError raised') + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python inf_iter.py +expect: + stdout: |+ + MemoryError raised + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/file_binary_read.yaml b/tests/scenarios/cmd/python/basic/file_binary_read.yaml new file mode 100644 index 00000000..97902b49 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/file_binary_read.yaml @@ -0,0 +1,16 @@ +description: open() in binary mode ('rb') reads file content without error. +skip_assert_against_bash: true +setup: + files: + - path: data.bin + content: "hello" + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python -c "f = open('data.bin', 'rb'); f.read(); f.close(); print('ok')" +expect: + stdout: |+ + ok + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/file_readline.yaml b/tests/scenarios/cmd/python/basic/file_readline.yaml new file mode 100644 index 00000000..89228808 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/file_readline.yaml @@ -0,0 +1,20 @@ +description: open().readline() reads one line at a time from a file. +skip_assert_against_bash: true +setup: + files: + - path: lines.txt + content: |+ + first + second + third + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python -c "f = open('lines.txt'); print(f.readline().strip()); print(f.readline().strip()); f.close()" +expect: + stdout: |+ + first + second + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/file_readlines.yaml b/tests/scenarios/cmd/python/basic/file_readlines.yaml new file mode 100644 index 00000000..6d6065d5 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/file_readlines.yaml @@ -0,0 +1,19 @@ +description: open().readlines() returns all lines of a file as a list. +skip_assert_against_bash: true +setup: + files: + - path: items.txt + content: |+ + alpha + beta + gamma + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python -c "lines = open('items.txt').readlines(); print(len(lines))" +expect: + stdout: |+ + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/for_loop.yaml b/tests/scenarios/cmd/python/basic/for_loop.yaml new file mode 100644 index 00000000..9e2f8518 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/for_loop.yaml @@ -0,0 +1,20 @@ +description: python executes for loops over a range. +skip_assert_against_bash: true +setup: + files: + - path: loop.py + content: |+ + for i in range(1, 4): + print(i) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python loop.py +expect: + stdout: |+ + 1 + 2 + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/functions.yaml b/tests/scenarios/cmd/python/basic/functions.yaml new file mode 100644 index 00000000..ce5e0e83 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/functions.yaml @@ -0,0 +1,22 @@ +description: python supports defining and calling user-defined functions. +skip_assert_against_bash: true +setup: + files: + - path: funcs.py + content: |+ + def multiply(a, b): + return a * b + + print(multiply(3, 4)) + print(multiply(2, 5)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python funcs.py +expect: + stdout: |+ + 12 + 10 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/help_flag.yaml b/tests/scenarios/cmd/python/basic/help_flag.yaml new file mode 100644 index 00000000..140ba18c --- /dev/null +++ b/tests/scenarios/cmd/python/basic/help_flag.yaml @@ -0,0 +1,9 @@ +description: python --help prints usage to stdout and exits 0. +skip_assert_against_bash: true +input: + script: |+ + python --help +expect: + stdout_contains: ["Usage: python"] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/if_else.yaml b/tests/scenarios/cmd/python/basic/if_else.yaml new file mode 100644 index 00000000..3e808402 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/if_else.yaml @@ -0,0 +1,23 @@ +description: python evaluates if/elif/else branches correctly. +skip_assert_against_bash: true +setup: + files: + - path: branch.py + content: |+ + x = 7 + if x > 10: + print("large") + elif x > 5: + print("medium") + else: + print("small") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python branch.py +expect: + stdout: |+ + medium + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/json_loads_not_implemented.yaml b/tests/scenarios/cmd/python/basic/json_loads_not_implemented.yaml new file mode 100644 index 00000000..e57e4ec7 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/json_loads_not_implemented.yaml @@ -0,0 +1,19 @@ +description: json.loads raises ValueError (matching CPython's json.JSONDecodeError which is a subclass of ValueError) so callers using except ValueError can handle the error correctly. +skip_assert_against_bash: true +input: + script: |+ + python -c " + import json + try: + json.loads('{\"a\": 1}') + print('no error') + except ValueError: + print('ValueError raised') + except NotImplementedError: + print('NotImplementedError raised (wrong)') + " +expect: + stdout: |+ + ValueError raised + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/list_operations.yaml b/tests/scenarios/cmd/python/basic/list_operations.yaml new file mode 100644 index 00000000..46e667e2 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/list_operations.yaml @@ -0,0 +1,11 @@ +description: python supports list operations including append, len, and indexing. +skip_assert_against_bash: true +input: + script: |+ + python -c "lst = [10, 20, 30]; lst.append(40); print(len(lst)); print(lst[-1])" +expect: + stdout: |+ + 4 + 40 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/math_comb_perm_limit.yaml b/tests/scenarios/cmd/python/basic/math_comb_perm_limit.yaml new file mode 100644 index 00000000..9be15a1f --- /dev/null +++ b/tests/scenarios/cmd/python/basic/math_comb_perm_limit.yaml @@ -0,0 +1,30 @@ +description: math.comb and math.perm raise ValueError when k exceeds 10000 to prevent CPU exhaustion. +skip_assert_against_bash: true +input: + script: |+ + python -c " + import math + try: + x = math.comb(1000000, 500000) + print('no error') + except ValueError: + print('ValueError for math.comb large k') + + try: + x = math.perm(1000000, 500000) + print('no error') + except ValueError: + print('ValueError for math.perm large k') + + # Small values should work fine + print(math.comb(10, 3)) + print(math.perm(5, 2)) + " +expect: + stdout: |+ + ValueError for math.comb large k + ValueError for math.perm large k + 120 + 20 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/math_module.yaml b/tests/scenarios/cmd/python/basic/math_module.yaml new file mode 100644 index 00000000..1dbeb13c --- /dev/null +++ b/tests/scenarios/cmd/python/basic/math_module.yaml @@ -0,0 +1,11 @@ +description: python can import and use the math module. +skip_assert_against_bash: true +input: + script: |+ + python -c "import math; print(math.floor(3.7)); print(int(math.sqrt(16)))" +expect: + stdout: |+ + 3 + 4 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/multiline_inline.yaml b/tests/scenarios/cmd/python/basic/multiline_inline.yaml new file mode 100644 index 00000000..95254f77 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/multiline_inline.yaml @@ -0,0 +1,10 @@ +description: python -c handles multiline code using semicolons. +skip_assert_against_bash: true +input: + script: |+ + python -c "x = 1 + 2; print(x)" +expect: + stdout: |+ + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/os_read_only.yaml b/tests/scenarios/cmd/python/basic/os_read_only.yaml new file mode 100644 index 00000000..64afbda4 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/os_read_only.yaml @@ -0,0 +1,10 @@ +description: python os.getenv always returns the default — host environment is not accessible. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; print(os.getenv('NONEXISTENT_KEY_XYZ', 'default'))" +expect: + stdout: |+ + default + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/print_inline.yaml b/tests/scenarios/cmd/python/basic/print_inline.yaml new file mode 100644 index 00000000..3dda4abc --- /dev/null +++ b/tests/scenarios/cmd/python/basic/print_inline.yaml @@ -0,0 +1,10 @@ +description: python -c executes inline Python code and prints output. +skip_assert_against_bash: true +input: + script: |+ + python -c "print('hello')" +expect: + stdout: |+ + hello + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/read_file.yaml b/tests/scenarios/cmd/python/basic/read_file.yaml new file mode 100644 index 00000000..250f09c4 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/read_file.yaml @@ -0,0 +1,17 @@ +description: python can read files via open() when they are within AllowedPaths. +skip_assert_against_bash: true +setup: + files: + - path: hello.txt + content: |+ + hello from file + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python -c "f = open('hello.txt'); print(f.read().strip()); f.close()" +expect: + stdout: |+ + hello from file + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/run_script_file.yaml b/tests/scenarios/cmd/python/basic/run_script_file.yaml new file mode 100644 index 00000000..7b1ff934 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/run_script_file.yaml @@ -0,0 +1,17 @@ +description: python executes a script file passed as a positional argument. +skip_assert_against_bash: true +setup: + files: + - path: hello.py + content: |+ + print("hello from script") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python hello.py +expect: + stdout: |+ + hello from script + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/string_module.yaml b/tests/scenarios/cmd/python/basic/string_module.yaml new file mode 100644 index 00000000..24c1527c --- /dev/null +++ b/tests/scenarios/cmd/python/basic/string_module.yaml @@ -0,0 +1,10 @@ +description: python can import the string module and access character set constants. +skip_assert_against_bash: true +input: + script: |+ + python -c "import string; print(string.digits)" +expect: + stdout: |+ + 0123456789 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/string_operations.yaml b/tests/scenarios/cmd/python/basic/string_operations.yaml new file mode 100644 index 00000000..398ba856 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/string_operations.yaml @@ -0,0 +1,12 @@ +description: python supports string operations including concatenation, repetition, and slicing. +skip_assert_against_bash: true +input: + script: |+ + python -c "s = 'hello'; print(s + ' world'); print(s[1:4]); print(s * 2)" +expect: + stdout: |+ + hello world + ell + hellohello + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/sys_argv_inline.yaml b/tests/scenarios/cmd/python/basic/sys_argv_inline.yaml new file mode 100644 index 00000000..99adc9f9 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/sys_argv_inline.yaml @@ -0,0 +1,11 @@ +description: python -c with extra arguments populates sys.argv with as argv[0]. +skip_assert_against_bash: true +input: + script: |+ + python -c "import sys; print(sys.argv[0]); print(sys.argv[1])" hello +expect: + stdout: |+ + + hello + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/sys_argv_script.yaml b/tests/scenarios/cmd/python/basic/sys_argv_script.yaml new file mode 100644 index 00000000..8e980df0 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/sys_argv_script.yaml @@ -0,0 +1,21 @@ +description: extra arguments after the script name are available in sys.argv. +skip_assert_against_bash: true +setup: + files: + - path: show_args.py + content: |+ + import sys + for arg in sys.argv: + print(arg) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python show_args.py foo bar +expect: + stdout: |+ + show_args.py + foo + bar + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/sys_exit_nonzero.yaml b/tests/scenarios/cmd/python/basic/sys_exit_nonzero.yaml new file mode 100644 index 00000000..a694129c --- /dev/null +++ b/tests/scenarios/cmd/python/basic/sys_exit_nonzero.yaml @@ -0,0 +1,11 @@ +description: sys.exit(N) sets $? to N in the calling shell. +skip_assert_against_bash: true +input: + script: |+ + python -c "import sys; sys.exit(42)" + echo "exit was $?" +expect: + stdout: |+ + exit was 42 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/sys_exit_string.yaml b/tests/scenarios/cmd/python/basic/sys_exit_string.yaml new file mode 100644 index 00000000..fa45f126 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/sys_exit_string.yaml @@ -0,0 +1,10 @@ +description: sys.exit() with a non-integer argument prints the message to stderr and exits with code 1. +skip_assert_against_bash: true +input: + script: |+ + python -c "import sys; sys.exit('fatal error')" +expect: + stdout: |+ + stderr: |+ + fatal error + exit_code: 1 diff --git a/tests/scenarios/cmd/python/basic/sys_exit_zero.yaml b/tests/scenarios/cmd/python/basic/sys_exit_zero.yaml new file mode 100644 index 00000000..d2ff5377 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/sys_exit_zero.yaml @@ -0,0 +1,11 @@ +description: sys.exit(0) exits with code 0. +skip_assert_against_bash: true +input: + script: |+ + python -c "import sys; sys.exit(0)" + echo "after" +expect: + stdout: |+ + after + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/sys_platform.yaml b/tests/scenarios/cmd/python/basic/sys_platform.yaml new file mode 100644 index 00000000..8617b2c2 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/sys_platform.yaml @@ -0,0 +1,10 @@ +description: python sys.platform returns a non-empty string. +skip_assert_against_bash: true +input: + script: |+ + python -c "import sys; print(len(sys.platform) > 0)" +expect: + stdout: |+ + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/try_except.yaml b/tests/scenarios/cmd/python/basic/try_except.yaml new file mode 100644 index 00000000..ce1ecae1 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/try_except.yaml @@ -0,0 +1,22 @@ +description: python catches exceptions with try/except and continues execution. +skip_assert_against_bash: true +setup: + files: + - path: exc.py + content: |+ + try: + raise ValueError("test error") + except ValueError as e: + print("caught:", e) + print("done") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python exc.py +expect: + stdout: |+ + caught: test error + done + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/while_loop.yaml b/tests/scenarios/cmd/python/basic/while_loop.yaml new file mode 100644 index 00000000..e4b14709 --- /dev/null +++ b/tests/scenarios/cmd/python/basic/while_loop.yaml @@ -0,0 +1,22 @@ +description: python executes while loops with a counter. +skip_assert_against_bash: true +setup: + files: + - path: while.py + content: |+ + n = 0 + while n < 3: + print(n) + n += 1 + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python while.py +expect: + stdout: |+ + 0 + 1 + 2 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/basic/with_statement.yaml b/tests/scenarios/cmd/python/basic/with_statement.yaml new file mode 100644 index 00000000..d1cea7ad --- /dev/null +++ b/tests/scenarios/cmd/python/basic/with_statement.yaml @@ -0,0 +1,25 @@ +description: python supports the with statement for context managers via a script file. +skip_assert_against_bash: true +setup: + files: + - path: data.txt + content: |+ + line one + line two + chmod: 0644 + - path: read_file.py + content: |+ + with open('data.txt') as f: + content = f.read() + print(content.strip()) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python read_file.py +expect: + stdout: |+ + line one + line two + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/abs_divmod_pow.yaml b/tests/scenarios/cmd/python/builtins/abs_divmod_pow.yaml new file mode 100644 index 00000000..f8851614 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/abs_divmod_pow.yaml @@ -0,0 +1,34 @@ +description: python abs(), divmod(), and pow() perform absolute value, combined division/modulo, and exponentiation including big integers. +skip_assert_against_bash: true +setup: + files: + - path: math_builtins.py + content: |+ + print(abs(-5)) + print(abs(3)) + print(divmod(10, 3)) + print(divmod(-7, 2)) + print(pow(2, 10)) + print(pow(3, 3, 10)) + # big int divmod (> int64 range) + big = 10**40 + q, r = divmod(big, 3) + print(q) + print(r) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python math_builtins.py +expect: + stdout: |+ + 5 + 3 + (3, 1) + (-4, 1) + 1024 + 7 + 3333333333333333333333333333333333333333 + 1 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/all_any.yaml b/tests/scenarios/cmd/python/builtins/all_any.yaml new file mode 100644 index 00000000..658a340e --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/all_any.yaml @@ -0,0 +1,27 @@ +description: python all() and any() test whether all or any elements are truthy. +skip_assert_against_bash: true +setup: + files: + - path: allany.py + content: |+ + print(all([True, True, True])) + print(all([True, False, True])) + print(any([False, False, True])) + print(any([False, False, False])) + print(all(x > 0 for x in [1, 2, 3])) + print(any(x > 5 for x in [1, 2, 3])) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python allany.py +expect: + stdout: |+ + True + False + True + False + True + False + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/bin_hex_oct.yaml b/tests/scenarios/cmd/python/builtins/bin_hex_oct.yaml new file mode 100644 index 00000000..4eb34d0c --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/bin_hex_oct.yaml @@ -0,0 +1,35 @@ +description: python bin(), hex(), and oct() convert integers to binary, hex, and octal strings including big integers. +skip_assert_against_bash: true +setup: + files: + - path: binhexoct.py + content: |+ + print(bin(10)) + print(bin(255)) + print(hex(255)) + print(hex(16)) + print(oct(8)) + print(oct(64)) + # big int (> int64 max) + big = 2**63 + print(bin(big)) + print(hex(big)) + print(oct(big)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python binhexoct.py +expect: + stdout: |+ + 0b1010 + 0b11111111 + 0xff + 0x10 + 0o10 + 0o100 + 0b1000000000000000000000000000000000000000000000000000000000000000 + 0x8000000000000000 + 0o1000000000000000000000 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/bytes_bytearray_oom_guard.yaml b/tests/scenarios/cmd/python/builtins/bytes_bytearray_oom_guard.yaml new file mode 100644 index 00000000..1ea25743 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/bytes_bytearray_oom_guard.yaml @@ -0,0 +1,21 @@ +description: bytes(n) and bytearray(n) with very large n raise MemoryError instead of OOM. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + bytes(2**40) + print('no error') + except MemoryError as e: + print('bytes MemoryError ok') + try: + bytearray(2**40) + print('no error') + except MemoryError as e: + print('bytearray MemoryError ok') + " +expect: + stdout: |+ + bytes MemoryError ok + bytearray MemoryError ok + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/chr_ord.yaml b/tests/scenarios/cmd/python/builtins/chr_ord.yaml new file mode 100644 index 00000000..1c6fb53b --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/chr_ord.yaml @@ -0,0 +1,25 @@ +description: python chr() converts an integer to a character and ord() converts back. +skip_assert_against_bash: true +setup: + files: + - path: chrord.py + content: |+ + print(chr(65)) + print(chr(97)) + print(ord('A')) + print(ord('a')) + print(chr(ord('Z') + 1)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python chrord.py +expect: + stdout: |+ + A + a + 65 + 97 + [ + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/enumerate.yaml b/tests/scenarios/cmd/python/builtins/enumerate.yaml new file mode 100644 index 00000000..218fabff --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/enumerate.yaml @@ -0,0 +1,26 @@ +description: python enumerate() adds an index counter to an iterable, with optional positional start offset. +skip_assert_against_bash: true +setup: + files: + - path: enumerate.py + content: |+ + fruits = ["apple", "banana", "cherry"] + for i, fruit in enumerate(fruits): + print(i, fruit) + for i, fruit in enumerate(fruits, 1): + print(i, fruit) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python enumerate.py +expect: + stdout: |+ + 0 apple + 1 banana + 2 cherry + 1 apple + 2 banana + 3 cherry + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/filter.yaml b/tests/scenarios/cmd/python/builtins/filter.yaml new file mode 100644 index 00000000..c3b5f541 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/filter.yaml @@ -0,0 +1,10 @@ +description: python filter() selects elements from an iterable for which a function returns true. +skip_assert_against_bash: true +input: + script: |+ + python -c "nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; evens = list(filter(lambda x: x % 2 == 0, nums)); print(evens)" +expect: + stdout: |+ + [2, 4, 6, 8, 10] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/getattr_setattr_hasattr.yaml b/tests/scenarios/cmd/python/builtins/getattr_setattr_hasattr.yaml new file mode 100644 index 00000000..309715b6 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/getattr_setattr_hasattr.yaml @@ -0,0 +1,33 @@ +description: python getattr/setattr/hasattr/delattr read, set, check, and remove object attributes. +skip_assert_against_bash: true +setup: + files: + - path: attrs.py + content: |+ + class Point: + def __init__(self, x, y): + self.x = x + self.y = y + + p = Point(3, 4) + print(getattr(p, 'x')) + print(hasattr(p, 'z')) + setattr(p, 'z', 7) + print(getattr(p, 'z')) + print(hasattr(p, 'z')) + delattr(p, 'z') + print(hasattr(p, 'z')) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python attrs.py +expect: + stdout: |+ + 3 + False + 7 + True + False + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/id_stability.yaml b/tests/scenarios/cmd/python/builtins/id_stability.yaml new file mode 100644 index 00000000..02ed1822 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/id_stability.yaml @@ -0,0 +1,18 @@ +description: python id() returns a stable identifier for the same object across multiple calls. +skip_assert_against_bash: true +input: + script: |+ + python -c " + x = 'hello' + print(id(x) == id(x)) + a = [1, 2, 3] + print(id(a) == id(a)) + print(id(x) != id(a)) + " +expect: + stdout: |+ + True + True + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/index_bigint_raises.yaml b/tests/scenarios/cmd/python/builtins/index_bigint_raises.yaml new file mode 100644 index 00000000..0009d4c7 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/index_bigint_raises.yaml @@ -0,0 +1,22 @@ +description: python list/string/bytes indexing with a huge big-int raises IndexError instead of silently returning index 0. +skip_assert_against_bash: true +input: + script: |+ + python -c " + a = [1, 2, 3] + try: + x = a[2**80] + except IndexError as e: + print('list IndexError:', 'cannot fit' in str(e)) + + try: + x = 'abc'[2**80] + except IndexError as e: + print('str IndexError:', 'cannot fit' in str(e)) + " +expect: + stdout: |+ + list IndexError: True + str IndexError: True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/isinstance.yaml b/tests/scenarios/cmd/python/builtins/isinstance.yaml new file mode 100644 index 00000000..3ff2010f --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/isinstance.yaml @@ -0,0 +1,27 @@ +description: python isinstance() checks whether an object is an instance of a type or tuple of types. +skip_assert_against_bash: true +setup: + files: + - path: isinstance.py + content: |+ + print(isinstance(42, int)) + print(isinstance("hello", str)) + print(isinstance(3.14, float)) + print(isinstance([1, 2], list)) + print(isinstance(42, (int, str))) + print(isinstance("x", (int, str))) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python isinstance.py +expect: + stdout: |+ + True + True + True + True + True + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/len.yaml b/tests/scenarios/cmd/python/builtins/len.yaml new file mode 100644 index 00000000..19818ad3 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/len.yaml @@ -0,0 +1,25 @@ +description: python len() returns the number of items in lists, strings, dicts, tuples, and sets. +skip_assert_against_bash: true +setup: + files: + - path: len.py + content: |+ + print(len([1, 2, 3])) + print(len("hello")) + print(len({"a": 1, "b": 2})) + print(len((1, 2, 3, 4))) + print(len({1, 2, 3})) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python len.py +expect: + stdout: |+ + 3 + 5 + 2 + 4 + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/map.yaml b/tests/scenarios/cmd/python/builtins/map.yaml new file mode 100644 index 00000000..46cb8fb9 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/map.yaml @@ -0,0 +1,10 @@ +description: python map() applies a function to each element of an iterable. +skip_assert_against_bash: true +input: + script: |+ + python -c "nums = [1, 2, 3, 4, 5]; strs = list(map(str, nums)); print(strs)" +expect: + stdout: |+ + ['1', '2', '3', '4', '5'] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/min_max.yaml b/tests/scenarios/cmd/python/builtins/min_max.yaml new file mode 100644 index 00000000..1d77eef7 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/min_max.yaml @@ -0,0 +1,29 @@ +description: python min() and max() find the smallest/largest value with optional key function. +skip_assert_against_bash: true +setup: + files: + - path: minmax.py + content: |+ + nums = [3, 1, 4, 1, 5, 9, 2, 6] + print(min(nums)) + print(max(nums)) + words = ["apple", "fig", "banana"] + print(min(words, key=lambda w: len(w))) + print(max(words, key=lambda w: len(w))) + print(min(3, 7, 1, 9)) + print(max(3, 7, 1, 9)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python minmax.py +expect: + stdout: |+ + 1 + 9 + fig + banana + 1 + 9 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/print_kwargs.yaml b/tests/scenarios/cmd/python/builtins/print_kwargs.yaml new file mode 100644 index 00000000..5e355aee --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/print_kwargs.yaml @@ -0,0 +1,22 @@ +description: python print() supports sep and end keyword arguments to control output formatting. +skip_assert_against_bash: true +setup: + files: + - path: printkw.py + content: |+ + print("a", "b", "c", sep="-") + print("hello", end="") + print(" world") + print("x", "y", sep="") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python printkw.py +expect: + stdout: |+ + a-b-c + hello world + xy + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/range_forms.yaml b/tests/scenarios/cmd/python/builtins/range_forms.yaml new file mode 100644 index 00000000..69b0c06f --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/range_forms.yaml @@ -0,0 +1,23 @@ +description: python range() supports stop-only, start/stop, start/stop/step, and negative step forms. +skip_assert_against_bash: true +setup: + files: + - path: range.py + content: |+ + print(list(range(5))) + print(list(range(2, 6))) + print(list(range(0, 10, 3))) + print(list(range(5, 0, -1))) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python range.py +expect: + stdout: |+ + [0, 1, 2, 3, 4] + [2, 3, 4, 5] + [0, 3, 6, 9] + [5, 4, 3, 2, 1] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/repr.yaml b/tests/scenarios/cmd/python/builtins/repr.yaml new file mode 100644 index 00000000..7937ce50 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/repr.yaml @@ -0,0 +1,25 @@ +description: python repr() returns a developer-readable string representation of an object. +skip_assert_against_bash: true +setup: + files: + - path: repr.py + content: |+ + print(repr("hello")) + print(repr(42)) + print(repr([1, 2, 3])) + print(repr(None)) + print(repr(True)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python repr.py +expect: + stdout: |+ + 'hello' + 42 + [1, 2, 3] + None + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/sorted_key.yaml b/tests/scenarios/cmd/python/builtins/sorted_key.yaml new file mode 100644 index 00000000..890315b0 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/sorted_key.yaml @@ -0,0 +1,22 @@ +description: python sorted() supports key function and reverse flag for custom ordering. +skip_assert_against_bash: true +setup: + files: + - path: sorted.py + content: |+ + words = ["banana", "fig", "apple", "date", "cherry"] + print(sorted(words)) + print(sorted(words, key=len)) + print(sorted(words, reverse=True)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python sorted.py +expect: + stdout: |+ + ['apple', 'banana', 'cherry', 'date', 'fig'] + ['fig', 'date', 'apple', 'banana', 'cherry'] + ['fig', 'date', 'cherry', 'banana', 'apple'] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/sum.yaml b/tests/scenarios/cmd/python/builtins/sum.yaml new file mode 100644 index 00000000..84ab1239 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/sum.yaml @@ -0,0 +1,22 @@ +description: python sum() adds all elements of an iterable with an optional start value. +skip_assert_against_bash: true +setup: + files: + - path: sum.py + content: |+ + nums = [1, 2, 3, 4, 5] + print(sum(nums)) + print(sum(nums, 10)) + print(sum(x * x for x in range(4))) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python sum.py +expect: + stdout: |+ + 15 + 25 + 14 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/type_constructors.yaml b/tests/scenarios/cmd/python/builtins/type_constructors.yaml new file mode 100644 index 00000000..d0d6a3fd --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/type_constructors.yaml @@ -0,0 +1,30 @@ +description: python type constructor functions convert between types. +skip_assert_against_bash: true +setup: + files: + - path: typecons.py + content: |+ + print(int("42")) + print(int(3.9)) + print(float("3.14")) + print(str(42)) + print(list((1, 2, 3))) + print(tuple([4, 5, 6])) + s = set([1, 2, 3, 2, 1]) + print(sorted(s)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python typecons.py +expect: + stdout: |+ + 42 + 3 + 3.14 + 42 + [1, 2, 3] + (4, 5, 6) + [1, 2, 3] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/builtins/zip.yaml b/tests/scenarios/cmd/python/builtins/zip.yaml new file mode 100644 index 00000000..90991e45 --- /dev/null +++ b/tests/scenarios/cmd/python/builtins/zip.yaml @@ -0,0 +1,22 @@ +description: python zip() pairs up elements from multiple iterables element-wise. +skip_assert_against_bash: true +setup: + files: + - path: zip.py + content: |+ + names = ["Alice", "Bob", "Charlie"] + scores = [95, 87, 92] + for name, score in zip(names, scores): + print(name, score) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python zip.py +expect: + stdout: |+ + Alice 95 + Bob 87 + Charlie 92 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/complex/csv_parser.yaml b/tests/scenarios/cmd/python/complex/csv_parser.yaml new file mode 100644 index 00000000..e28af003 --- /dev/null +++ b/tests/scenarios/cmd/python/complex/csv_parser.yaml @@ -0,0 +1,37 @@ +description: python script parses a CSV file and computes column sums. +skip_assert_against_bash: true +setup: + files: + - path: data.csv + content: |+ + name,score + alice,85 + bob,92 + carol,78 + chmod: 0644 + - path: csv_sum.py + content: |+ + total = 0 + count = 0 + with open('data.csv') as f: + lines = f.read().split('\n') + for line in lines[1:]: + if line: + parts = line.split(',') + total = total + int(parts[1]) + count = count + 1 + print('total: ' + str(total)) + print('count: ' + str(count)) + print('average: ' + str(total // count)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python csv_sum.py +expect: + stdout: |+ + total: 255 + count: 3 + average: 85 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/complex/exception_chain.yaml b/tests/scenarios/cmd/python/complex/exception_chain.yaml new file mode 100644 index 00000000..2db0597f --- /dev/null +++ b/tests/scenarios/cmd/python/complex/exception_chain.yaml @@ -0,0 +1,33 @@ +description: python script uses nested try/except blocks and reraises exceptions. +skip_assert_against_bash: true +setup: + files: + - path: exc_chain.py + content: |+ + def parse_int(s): + try: + return int(s) + except ValueError: + raise ValueError('not a number: ' + s) + + results = [] + for token in ['42', 'bad', '7']: + try: + results.append(parse_int(token)) + except ValueError as e: + results.append(-1) + + for r in results: + print(r) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python exc_chain.py +expect: + stdout: |+ + 42 + -1 + 7 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/complex/fibonacci.yaml b/tests/scenarios/cmd/python/complex/fibonacci.yaml new file mode 100644 index 00000000..0c178078 --- /dev/null +++ b/tests/scenarios/cmd/python/complex/fibonacci.yaml @@ -0,0 +1,30 @@ +description: python script computes Fibonacci numbers using recursion. +skip_assert_against_bash: true +setup: + files: + - path: fib.py + content: |+ + def fib(n): + if n <= 1: + return n + return fib(n - 1) + fib(n - 2) + + for i in range(8): + print(fib(i)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python fib.py +expect: + stdout: |+ + 0 + 1 + 1 + 2 + 3 + 5 + 8 + 13 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/complex/inheritance.yaml b/tests/scenarios/cmd/python/complex/inheritance.yaml new file mode 100644 index 00000000..bfa90986 --- /dev/null +++ b/tests/scenarios/cmd/python/complex/inheritance.yaml @@ -0,0 +1,39 @@ +description: python script uses class inheritance with method overriding. +skip_assert_against_bash: true +setup: + files: + - path: shapes.py + content: |+ + class Shape: + def area(self): + return 0 + + def describe(self): + return 'Shape with area ' + str(self.area()) + + class Rectangle(Shape): + def __init__(self, w, h): + self.w = w + self.h = h + + def area(self): + return self.w * self.h + + class Square(Rectangle): + def __init__(self, side): + Rectangle.__init__(self, side, side) + + shapes = [Rectangle(3, 4), Square(5)] + for s in shapes: + print(s.describe()) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python shapes.py +expect: + stdout: |+ + Shape with area 12 + Shape with area 25 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/complex/matrix_multiply.yaml b/tests/scenarios/cmd/python/complex/matrix_multiply.yaml new file mode 100644 index 00000000..155c6209 --- /dev/null +++ b/tests/scenarios/cmd/python/complex/matrix_multiply.yaml @@ -0,0 +1,31 @@ +description: python script multiplies two 2x2 matrices using nested lists. +skip_assert_against_bash: true +setup: + files: + - path: matmul.py + content: |+ + def matmul(A, B): + n = len(A) + C = [[0] * n for _ in range(n)] + for i in range(n): + for j in range(n): + for k in range(n): + C[i][j] = C[i][j] + A[i][k] * B[k][j] + return C + + A = [[1, 2], [3, 4]] + B = [[5, 6], [7, 8]] + C = matmul(A, B) + for row in C: + print(str(row[0]) + ' ' + str(row[1])) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python matmul.py +expect: + stdout: |+ + 19 22 + 43 50 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/complex/multi_file.yaml b/tests/scenarios/cmd/python/complex/multi_file.yaml new file mode 100644 index 00000000..10f9c106 --- /dev/null +++ b/tests/scenarios/cmd/python/complex/multi_file.yaml @@ -0,0 +1,31 @@ +description: python script processes multiple input files passed via sys.argv. +skip_assert_against_bash: true +setup: + files: + - path: a.txt + content: "10\n20\n30\n" + chmod: 0644 + - path: b.txt + content: "5\n15\n" + chmod: 0644 + - path: sum_files.py + content: |+ + import sys + total = 0 + for path in sys.argv[1:]: + with open(path) as f: + for line in f.readlines(): + line = line.strip() + if line: + total = total + int(line) + print(total) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python sum_files.py a.txt b.txt +expect: + stdout: |+ + 80 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/complex/sieve.yaml b/tests/scenarios/cmd/python/complex/sieve.yaml new file mode 100644 index 00000000..08f97094 --- /dev/null +++ b/tests/scenarios/cmd/python/complex/sieve.yaml @@ -0,0 +1,41 @@ +description: python script implements the Sieve of Eratosthenes to find primes. +skip_assert_against_bash: true +setup: + files: + - path: sieve.py + content: |+ + def sieve(n): + is_prime = [True] * (n + 1) + is_prime[0] = False + is_prime[1] = False + i = 2 + while i * i <= n: + if is_prime[i]: + j = i * i + while j <= n: + is_prime[j] = False + j = j + i + i = i + 1 + return [x for x in range(n + 1) if is_prime[x]] + + for p in sieve(30): + print(p) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python sieve.py +expect: + stdout: |+ + 2 + 3 + 5 + 7 + 11 + 13 + 17 + 19 + 23 + 29 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/complex/word_count.yaml b/tests/scenarios/cmd/python/complex/word_count.yaml new file mode 100644 index 00000000..fdd9a35d --- /dev/null +++ b/tests/scenarios/cmd/python/complex/word_count.yaml @@ -0,0 +1,30 @@ +description: python script reads a file and counts word frequencies. +skip_assert_against_bash: true +setup: + files: + - path: text.txt + content: "apple banana apple cherry banana apple\n" + chmod: 0644 + - path: wc.py + content: |+ + counts = {} + with open('text.txt') as f: + for word in f.read().split(): + if word in counts: + counts[word] = counts[word] + 1 + else: + counts[word] = 1 + for word in ['apple', 'banana', 'cherry']: + print(word + ': ' + str(counts[word])) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python wc.py +expect: + stdout: |+ + apple: 3 + banana: 2 + cherry: 1 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/comprehensions/dict_comprehension.yaml b/tests/scenarios/cmd/python/comprehensions/dict_comprehension.yaml new file mode 100644 index 00000000..66e9edd1 --- /dev/null +++ b/tests/scenarios/cmd/python/comprehensions/dict_comprehension.yaml @@ -0,0 +1,22 @@ +description: python dict comprehension builds a dict mapping string keys to computed values. +skip_assert_against_bash: true +setup: + files: + - path: dictcomp.py + content: |+ + words = ["apple", "banana", "cherry"] + word_lens = {w: len(w) for w in words} + for k in sorted(word_lens.keys()): + print(k, word_lens[k]) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python dictcomp.py +expect: + stdout: |+ + apple 5 + banana 6 + cherry 6 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/comprehensions/generator_expression.yaml b/tests/scenarios/cmd/python/comprehensions/generator_expression.yaml new file mode 100644 index 00000000..644991e8 --- /dev/null +++ b/tests/scenarios/cmd/python/comprehensions/generator_expression.yaml @@ -0,0 +1,10 @@ +description: python generator expression lazily computes values and is consumed by sum. +skip_assert_against_bash: true +input: + script: |+ + python -c "total = sum(x * x for x in range(5)); print(total)" +expect: + stdout: |+ + 30 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/comprehensions/list_comprehension.yaml b/tests/scenarios/cmd/python/comprehensions/list_comprehension.yaml new file mode 100644 index 00000000..0931e48e --- /dev/null +++ b/tests/scenarios/cmd/python/comprehensions/list_comprehension.yaml @@ -0,0 +1,10 @@ +description: python list comprehension builds a list by applying an expression to each element. +skip_assert_against_bash: true +input: + script: |+ + python -c "squares = [x * x for x in range(5)]; print(squares)" +expect: + stdout: |+ + [0, 1, 4, 9, 16] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/comprehensions/list_comprehension_filtered.yaml b/tests/scenarios/cmd/python/comprehensions/list_comprehension_filtered.yaml new file mode 100644 index 00000000..00fdc723 --- /dev/null +++ b/tests/scenarios/cmd/python/comprehensions/list_comprehension_filtered.yaml @@ -0,0 +1,10 @@ +description: python list comprehension with if filter selects matching elements. +skip_assert_against_bash: true +input: + script: |+ + python -c "evens = [x for x in range(10) if x % 2 == 0]; print(evens)" +expect: + stdout: |+ + [0, 2, 4, 6, 8] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/comprehensions/nested_list_comprehension.yaml b/tests/scenarios/cmd/python/comprehensions/nested_list_comprehension.yaml new file mode 100644 index 00000000..500450c2 --- /dev/null +++ b/tests/scenarios/cmd/python/comprehensions/nested_list_comprehension.yaml @@ -0,0 +1,10 @@ +description: python nested list comprehension flattens a 2D matrix into a 1D list. +skip_assert_against_bash: true +input: + script: |+ + python -c "matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]; flat = [x for row in matrix for x in row]; print(flat)" +expect: + stdout: |+ + [1, 2, 3, 4, 5, 6, 7, 8, 9] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/comprehensions/set_comprehension.yaml b/tests/scenarios/cmd/python/comprehensions/set_comprehension.yaml new file mode 100644 index 00000000..7985dafe --- /dev/null +++ b/tests/scenarios/cmd/python/comprehensions/set_comprehension.yaml @@ -0,0 +1,10 @@ +description: python set comprehension builds a set of unique computed values. +skip_assert_against_bash: true +input: + script: |+ + python -c "s = {x % 3 for x in range(9)}; print(sorted(s))" +expect: + stdout: |+ + [0, 1, 2] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/data_structures/extended_unpacking.yaml b/tests/scenarios/cmd/python/data_structures/extended_unpacking.yaml new file mode 100644 index 00000000..2bbb62fc --- /dev/null +++ b/tests/scenarios/cmd/python/data_structures/extended_unpacking.yaml @@ -0,0 +1,28 @@ +description: python extended unpacking with star expression captures remaining elements into a list. +skip_assert_against_bash: true +setup: + files: + - path: starpack.py + content: |+ + first, *rest = [1, 2, 3, 4, 5] + print(first) + print(rest) + *init, last = [1, 2, 3, 4, 5] + print(init) + print(last) + a, *b, c = [1, 2, 3, 4, 5] + print(a, b, c) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python starpack.py +expect: + stdout: |+ + 1 + [2, 3, 4, 5] + [1, 2, 3, 4] + 5 + 1 [2, 3, 4] 5 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/data_structures/set_operations.yaml b/tests/scenarios/cmd/python/data_structures/set_operations.yaml new file mode 100644 index 00000000..b2273927 --- /dev/null +++ b/tests/scenarios/cmd/python/data_structures/set_operations.yaml @@ -0,0 +1,30 @@ +description: python set supports union, intersection, difference, and add operations. +skip_assert_against_bash: true +setup: + files: + - path: setops.py + content: |+ + a = {1, 2, 3, 4} + b = {3, 4, 5, 6} + print(sorted(a | b)) + print(sorted(a & b)) + print(sorted(a - b)) + print(sorted(b - a)) + a.add(7) + print(7 in a) + print(1 in a) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python setops.py +expect: + stdout: |+ + [1, 2, 3, 4, 5, 6] + [3, 4] + [1, 2] + [5, 6] + True + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/data_structures/string_format_percent.yaml b/tests/scenarios/cmd/python/data_structures/string_format_percent.yaml new file mode 100644 index 00000000..452ddc8b --- /dev/null +++ b/tests/scenarios/cmd/python/data_structures/string_format_percent.yaml @@ -0,0 +1,26 @@ +description: python % operator formats strings with positional substitution for strings, ints, and floats. +skip_assert_against_bash: true +setup: + files: + - path: fmtpct.py + content: |+ + name = "world" + n = 42 + pi = 3.14159 + print("Hello, %s!" % name) + print("Number: %d" % n) + print("Float: %.2f" % pi) + print("%s has %d items" % ("list", 5)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python fmtpct.py +expect: + stdout: |+ + Hello, world! + Number: 42 + Float: 3.14 + list has 5 items + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/data_structures/string_methods.yaml b/tests/scenarios/cmd/python/data_structures/string_methods.yaml new file mode 100644 index 00000000..094b69fe --- /dev/null +++ b/tests/scenarios/cmd/python/data_structures/string_methods.yaml @@ -0,0 +1,29 @@ +description: python string methods strip, split, replace, find, startswith, endswith work correctly. +skip_assert_against_bash: true +setup: + files: + - path: strmethods.py + content: |+ + s = " Hello, World! " + print(s.strip()) + words = "one two three".split() + print(words) + print("hello world".replace("world", "Python")) + print("hello world".find("world")) + print("hello".startswith("hel")) + print("hello".endswith("lo")) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python strmethods.py +expect: + stdout: |+ + Hello, World! + ['one', 'two', 'three'] + hello Python + 6 + True + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/data_structures/tuple_unpacking.yaml b/tests/scenarios/cmd/python/data_structures/tuple_unpacking.yaml new file mode 100644 index 00000000..4e648b45 --- /dev/null +++ b/tests/scenarios/cmd/python/data_structures/tuple_unpacking.yaml @@ -0,0 +1,27 @@ +description: python tuple unpacking assigns multiple variables from a sequence in a single statement. +skip_assert_against_bash: true +setup: + files: + - path: unpack.py + content: |+ + a, b = 1, 2 + print(a, b) + x, y, z = (10, 20, 30) + print(x, y, z) + first, second = "ab" + print(first, second) + a, b = b, a + print(a, b) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python unpack.py +expect: + stdout: |+ + 1 2 + 10 20 30 + a b + 2 1 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/errors/index_error.yaml b/tests/scenarios/cmd/python/errors/index_error.yaml new file mode 100644 index 00000000..9d141608 --- /dev/null +++ b/tests/scenarios/cmd/python/errors/index_error.yaml @@ -0,0 +1,9 @@ +description: IndexError from out-of-bounds list access is reported to stderr and exits with code 1. +skip_assert_against_bash: true +input: + script: |+ + python -c "lst = [1, 2, 3]; print(lst[10])" +expect: + stdout: |+ + stderr_contains: ["IndexError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/errors/key_error.yaml b/tests/scenarios/cmd/python/errors/key_error.yaml new file mode 100644 index 00000000..5616af08 --- /dev/null +++ b/tests/scenarios/cmd/python/errors/key_error.yaml @@ -0,0 +1,9 @@ +description: KeyError from missing dict key is reported to stderr and exits with code 1. +skip_assert_against_bash: true +input: + script: |+ + python -c "d = {'a': 1}; print(d['b'])" +expect: + stdout: |+ + stderr_contains: ["KeyError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/errors/missing_script_file.yaml b/tests/scenarios/cmd/python/errors/missing_script_file.yaml new file mode 100644 index 00000000..c917fc00 --- /dev/null +++ b/tests/scenarios/cmd/python/errors/missing_script_file.yaml @@ -0,0 +1,10 @@ +description: python exits with code 1 when the script file does not exist. +skip_assert_against_bash: true +input: + allowed_paths: ["$DIR"] + script: |+ + python nonexistent.py +expect: + stdout: |+ + stderr_contains: ["python:", "nonexistent.py"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/errors/name_error.yaml b/tests/scenarios/cmd/python/errors/name_error.yaml new file mode 100644 index 00000000..c691c043 --- /dev/null +++ b/tests/scenarios/cmd/python/errors/name_error.yaml @@ -0,0 +1,9 @@ +description: NameError from an undefined variable is reported to stderr and exits with code 1. +skip_assert_against_bash: true +input: + script: |+ + python -c "print(undefined_variable)" +expect: + stdout: |+ + stderr_contains: ["NameError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/errors/runtime_exception.yaml b/tests/scenarios/cmd/python/errors/runtime_exception.yaml new file mode 100644 index 00000000..e00d9aed --- /dev/null +++ b/tests/scenarios/cmd/python/errors/runtime_exception.yaml @@ -0,0 +1,9 @@ +description: Unhandled Python exceptions exit with code 1 and print traceback to stderr. +skip_assert_against_bash: true +input: + script: |+ + python -c "raise ValueError('oops')" +expect: + stdout: |+ + stderr_contains: ["ValueError", "oops"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/errors/syntax_error.yaml b/tests/scenarios/cmd/python/errors/syntax_error.yaml new file mode 100644 index 00000000..58aee4c5 --- /dev/null +++ b/tests/scenarios/cmd/python/errors/syntax_error.yaml @@ -0,0 +1,9 @@ +description: Python syntax errors are reported to stderr and exit with code 1. +skip_assert_against_bash: true +input: + script: |+ + python -c "def foo(" +expect: + stdout: |+ + stderr_contains: ["SyntaxError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/errors/type_error.yaml b/tests/scenarios/cmd/python/errors/type_error.yaml new file mode 100644 index 00000000..bb397752 --- /dev/null +++ b/tests/scenarios/cmd/python/errors/type_error.yaml @@ -0,0 +1,9 @@ +description: TypeError from incompatible operand types is reported to stderr and exits with code 1. +skip_assert_against_bash: true +input: + script: |+ + python -c "print('hello' + 42)" +expect: + stdout: |+ + stderr_contains: ["TypeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/errors/zero_division_error.yaml b/tests/scenarios/cmd/python/errors/zero_division_error.yaml new file mode 100644 index 00000000..bdd0af16 --- /dev/null +++ b/tests/scenarios/cmd/python/errors/zero_division_error.yaml @@ -0,0 +1,9 @@ +description: ZeroDivisionError is reported to stderr and exits with code 1. +skip_assert_against_bash: true +input: + script: |+ + python -c "print(1 / 0)" +expect: + stdout: |+ + stderr_contains: ["ZeroDivisionError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/exceptions/bare_raise.yaml b/tests/scenarios/cmd/python/exceptions/bare_raise.yaml new file mode 100644 index 00000000..45b84074 --- /dev/null +++ b/tests/scenarios/cmd/python/exceptions/bare_raise.yaml @@ -0,0 +1,28 @@ +description: python bare raise re-raises the current exception from within an except handler. +skip_assert_against_bash: true +setup: + files: + - path: bareraise.py + content: |+ + def risky(): + try: + raise RuntimeError("original") + except RuntimeError: + print("handling") + raise + + try: + risky() + except RuntimeError as e: + print("re-raised:", e) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python bareraise.py +expect: + stdout: |+ + handling + re-raised: original + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/exceptions/multiple_except_handlers.yaml b/tests/scenarios/cmd/python/exceptions/multiple_except_handlers.yaml new file mode 100644 index 00000000..8165c153 --- /dev/null +++ b/tests/scenarios/cmd/python/exceptions/multiple_except_handlers.yaml @@ -0,0 +1,36 @@ +description: python multiple except handlers match the first matching exception type, including tuple catch. +skip_assert_against_bash: true +setup: + files: + - path: multiexcept.py + content: |+ + for v in [1, 2, 0]: + try: + if v == 1: + raise ValueError("val") + elif v == 2: + raise TypeError("typ") + else: + print("ok") + except ValueError as e: + print("ValueError:", e) + except TypeError as e: + print("TypeError:", e) + + try: + raise IndexError("index") + except (ValueError, IndexError, KeyError) as e: + print("tuple catch:", e) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python multiexcept.py +expect: + stdout: |+ + ValueError: val + TypeError: typ + ok + tuple catch: index + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/exceptions/raise_from.yaml b/tests/scenarios/cmd/python/exceptions/raise_from.yaml new file mode 100644 index 00000000..67362d6d --- /dev/null +++ b/tests/scenarios/cmd/python/exceptions/raise_from.yaml @@ -0,0 +1,23 @@ +description: python raise X from Y chains exceptions, setting __cause__ on the new exception. +skip_assert_against_bash: true +setup: + files: + - path: raisefrom.py + content: |+ + try: + try: + int("not a number") + except ValueError as e: + raise TypeError("conversion failed") from e + except TypeError as e: + print("caught:", e) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python raisefrom.py +expect: + stdout: |+ + caught: conversion failed + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/exceptions/try_except_finally.yaml b/tests/scenarios/cmd/python/exceptions/try_except_finally.yaml new file mode 100644 index 00000000..356767f0 --- /dev/null +++ b/tests/scenarios/cmd/python/exceptions/try_except_finally.yaml @@ -0,0 +1,25 @@ +description: python try/except/finally catches the exception and runs cleanup in finally. +skip_assert_against_bash: true +setup: + files: + - path: excfinally.py + content: |+ + try: + raise ValueError("oops") + except ValueError as e: + print("caught:", e) + finally: + print("cleanup") + print("after") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python excfinally.py +expect: + stdout: |+ + caught: oops + cleanup + after + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/exceptions/try_finally.yaml b/tests/scenarios/cmd/python/exceptions/try_finally.yaml new file mode 100644 index 00000000..4299ebf4 --- /dev/null +++ b/tests/scenarios/cmd/python/exceptions/try_finally.yaml @@ -0,0 +1,27 @@ +description: python try/finally runs the finally block even when the try block returns. +skip_assert_against_bash: true +setup: + files: + - path: finally.py + content: |+ + def test(): + try: + print("try") + return 1 + finally: + print("finally") + + result = test() + print("result:", result) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python finally.py +expect: + stdout: |+ + try + finally + result: 1 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/functions/default_args.yaml b/tests/scenarios/cmd/python/functions/default_args.yaml new file mode 100644 index 00000000..e7fd33dc --- /dev/null +++ b/tests/scenarios/cmd/python/functions/default_args.yaml @@ -0,0 +1,24 @@ +description: python function default argument values are used when the caller omits those arguments. +skip_assert_against_bash: true +setup: + files: + - path: defaults.py + content: |+ + def greet(name, greeting="Hello"): + print(greeting + ", " + name + "!") + + greet("Alice") + greet("Bob", "Hi") + greet("Charlie", greeting="Hey") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python defaults.py +expect: + stdout: |+ + Hello, Alice! + Hi, Bob! + Hey, Charlie! + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/functions/kwargs_function.yaml b/tests/scenarios/cmd/python/functions/kwargs_function.yaml new file mode 100644 index 00000000..3d4eec73 --- /dev/null +++ b/tests/scenarios/cmd/python/functions/kwargs_function.yaml @@ -0,0 +1,23 @@ +description: python **kwargs collects extra keyword arguments into a dict. +skip_assert_against_bash: true +setup: + files: + - path: kwargs.py + content: |+ + def describe(**kwargs): + for key in sorted(kwargs.keys()): + print(key + "=" + str(kwargs[key])) + + describe(name="Alice", age=30, city="NYC") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python kwargs.py +expect: + stdout: |+ + age=30 + city=NYC + name=Alice + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/functions/varargs.yaml b/tests/scenarios/cmd/python/functions/varargs.yaml new file mode 100644 index 00000000..b6ecfca6 --- /dev/null +++ b/tests/scenarios/cmd/python/functions/varargs.yaml @@ -0,0 +1,35 @@ +description: python *args collects extra positional arguments into a tuple. +skip_assert_against_bash: true +setup: + files: + - path: varargs.py + content: |+ + def sum_all(*args): + total = 0 + for x in args: + total += x + return total + + print(sum_all(1, 2, 3)) + print(sum_all(10, 20)) + print(sum_all()) + + def first_and_rest(first, *rest): + print(first) + print(list(rest)) + + first_and_rest(1, 2, 3, 4) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python varargs.py +expect: + stdout: |+ + 6 + 30 + 0 + 1 + [2, 3, 4] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/generators/basic_yield.yaml b/tests/scenarios/cmd/python/generators/basic_yield.yaml new file mode 100644 index 00000000..0bd245ca --- /dev/null +++ b/tests/scenarios/cmd/python/generators/basic_yield.yaml @@ -0,0 +1,25 @@ +description: python generator function uses yield to produce a sequence of values. +skip_assert_against_bash: true +setup: + files: + - path: gen.py + content: |+ + def count_up(n): + for i in range(n): + yield i + + for val in count_up(4): + print(val) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python gen.py +expect: + stdout: |+ + 0 + 1 + 2 + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/generators/drain_generator_memory_limit.yaml b/tests/scenarios/cmd/python/generators/drain_generator_memory_limit.yaml new file mode 100644 index 00000000..924fc1ac --- /dev/null +++ b/tests/scenarios/cmd/python/generators/drain_generator_memory_limit.yaml @@ -0,0 +1,21 @@ +description: drainGenerator raises MemoryError when a generator produces too many items (list of infinite generator). +skip_assert_against_bash: true +input: + script: |+ + python -c " + def infinite(): + n = 0 + while True: + yield n + n += 1 + try: + list(infinite()) + print('no error') + except MemoryError as e: + print('MemoryError:', str(e)) + " +expect: + stdout: |+ + MemoryError: generator produced too many items (limit 131072) + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/generators/generator_exception_propagation.yaml b/tests/scenarios/cmd/python/generators/generator_exception_propagation.yaml new file mode 100644 index 00000000..977f0889 --- /dev/null +++ b/tests/scenarios/cmd/python/generators/generator_exception_propagation.yaml @@ -0,0 +1,28 @@ +description: python generator exceptions are propagated to the caller of next(), not silently swallowed. +skip_assert_against_bash: true +setup: + files: + - path: gen_exc.py + content: |+ + def gen(): + yield 1 + raise ValueError('oops') + yield 2 + + g = gen() + print(next(g)) + try: + next(g) + except ValueError as e: + print('caught:', e) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python gen_exc.py +expect: + stdout: |+ + 1 + caught: oops + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/generators/generator_send.yaml b/tests/scenarios/cmd/python/generators/generator_send.yaml new file mode 100644 index 00000000..d7166816 --- /dev/null +++ b/tests/scenarios/cmd/python/generators/generator_send.yaml @@ -0,0 +1,28 @@ +description: python generator send() passes a value into the generator at the yield point. +skip_assert_against_bash: true +setup: + files: + - path: gensend.py + content: |+ + def echo(): + while True: + val = yield + if val is None: + break + print("got:", val) + + g = echo() + next(g) + g.send("hello") + g.send("world") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python gensend.py +expect: + stdout: |+ + got: hello + got: world + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/generators/generator_stopiteration.yaml b/tests/scenarios/cmd/python/generators/generator_stopiteration.yaml new file mode 100644 index 00000000..5ffb8aa0 --- /dev/null +++ b/tests/scenarios/cmd/python/generators/generator_stopiteration.yaml @@ -0,0 +1,29 @@ +description: python generator raises StopIteration when exhausted and next() is called. +skip_assert_against_bash: true +setup: + files: + - path: stopiter.py + content: |+ + def one_two(): + yield 1 + yield 2 + + g = one_two() + print(next(g)) + print(next(g)) + try: + next(g) + except StopIteration: + print("stopped") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python stopiter.py +expect: + stdout: |+ + 1 + 2 + stopped + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/generators/yield_from.yaml b/tests/scenarios/cmd/python/generators/yield_from.yaml new file mode 100644 index 00000000..16b429cb --- /dev/null +++ b/tests/scenarios/cmd/python/generators/yield_from.yaml @@ -0,0 +1,28 @@ +description: python yield from delegates to a sub-generator, forwarding all its values. +skip_assert_against_bash: true +setup: + files: + - path: yieldfrom.py + content: |+ + def gen_a(): + yield 1 + yield 2 + + def gen_b(): + yield from gen_a() + yield 3 + + for v in gen_b(): + print(v) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python yieldfrom.py +expect: + stdout: |+ + 1 + 2 + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/assert_fails.yaml b/tests/scenarios/cmd/python/keywords/assert_fails.yaml new file mode 100644 index 00000000..d109107f --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/assert_fails.yaml @@ -0,0 +1,20 @@ +description: python assert raises AssertionError with message when condition is false. +skip_assert_against_bash: true +setup: + files: + - path: assert_fail.py + content: |+ + try: + assert False, "assertion message" + except AssertionError as e: + print("AssertionError:", e) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python assert_fail.py +expect: + stdout: |+ + AssertionError: assertion message + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/assert_passes.yaml b/tests/scenarios/cmd/python/keywords/assert_passes.yaml new file mode 100644 index 00000000..69136fee --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/assert_passes.yaml @@ -0,0 +1,10 @@ +description: python assert passes when condition is true. +skip_assert_against_bash: true +input: + script: |+ + python -c "assert True; assert 1 == 1; assert 'hello' != 'world'; print('all asserts passed')" +expect: + stdout: |+ + all asserts passed + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/break_nested.yaml b/tests/scenarios/cmd/python/keywords/break_nested.yaml new file mode 100644 index 00000000..22df2a64 --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/break_nested.yaml @@ -0,0 +1,23 @@ +description: python break exits only the innermost loop in nested loops. +skip_assert_against_bash: true +setup: + files: + - path: break.py + content: |+ + for i in range(3): + for j in range(3): + if j == 1: + break + print(i, j) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python break.py +expect: + stdout: |+ + 0 0 + 1 0 + 2 0 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/continue_nested.yaml b/tests/scenarios/cmd/python/keywords/continue_nested.yaml new file mode 100644 index 00000000..fb426caf --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/continue_nested.yaml @@ -0,0 +1,26 @@ +description: python continue skips to the next iteration of only the innermost loop. +skip_assert_against_bash: true +setup: + files: + - path: continue.py + content: |+ + for i in range(3): + for j in range(3): + if j == 1: + continue + print(i, j) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python continue.py +expect: + stdout: |+ + 0 0 + 0 2 + 1 0 + 1 2 + 2 0 + 2 2 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/del_statement.yaml b/tests/scenarios/cmd/python/keywords/del_statement.yaml new file mode 100644 index 00000000..dde9bf39 --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/del_statement.yaml @@ -0,0 +1,26 @@ +description: python del statement removes variables and list elements. +skip_assert_against_bash: true +setup: + files: + - path: del.py + content: |+ + x = 42 + del x + try: + print(x) + except NameError: + print("x deleted") + lst = [1, 2, 3] + del lst[1] + print(lst) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python del.py +expect: + stdout: |+ + x deleted + [1, 3] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/global_statement.yaml b/tests/scenarios/cmd/python/keywords/global_statement.yaml new file mode 100644 index 00000000..335a9627 --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/global_statement.yaml @@ -0,0 +1,26 @@ +description: python global statement allows functions to modify module-level variables. +skip_assert_against_bash: true +setup: + files: + - path: global.py + content: |+ + counter = 0 + + def increment(): + global counter + counter += 1 + + increment() + increment() + increment() + print(counter) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python global.py +expect: + stdout: |+ + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/in_not_in.yaml b/tests/scenarios/cmd/python/keywords/in_not_in.yaml new file mode 100644 index 00000000..c7044cec --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/in_not_in.yaml @@ -0,0 +1,30 @@ +description: python in and not in operators test membership in lists, dicts, and strings. +skip_assert_against_bash: true +setup: + files: + - path: membership.py + content: |+ + lst = [1, 2, 3, 4, 5] + print(3 in lst) + print(6 not in lst) + d = {"key": "value"} + print("key" in d) + print("missing" not in d) + s = "hello world" + print("world" in s) + print("xyz" not in s) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python membership.py +expect: + stdout: |+ + True + True + True + True + True + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/is_is_not.yaml b/tests/scenarios/cmd/python/keywords/is_is_not.yaml new file mode 100644 index 00000000..875ce402 --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/is_is_not.yaml @@ -0,0 +1,31 @@ +description: python is and is not operators test object identity. +skip_assert_against_bash: true +setup: + files: + - path: identity.py + content: |+ + x = None + print(x is None) + print(x is not None) + a = [1, 2, 3] + b = a + c = [1, 2, 3] + print(a is b) + print(a is not c) + print(True is True) + print(False is not True) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python identity.py +expect: + stdout: |+ + True + False + True + True + True + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/nonlocal_statement.yaml b/tests/scenarios/cmd/python/keywords/nonlocal_statement.yaml new file mode 100644 index 00000000..fe7d7f91 --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/nonlocal_statement.yaml @@ -0,0 +1,30 @@ +description: python nonlocal statement allows nested functions to modify enclosing scope variables. +skip_assert_against_bash: true +setup: + files: + - path: nonlocal.py + content: |+ + def make_counter(): + count = 0 + def increment(): + nonlocal count + count += 1 + return count + return increment + + c = make_counter() + print(c()) + print(c()) + print(c()) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python nonlocal.py +expect: + stdout: |+ + 1 + 2 + 3 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/pass_statement.yaml b/tests/scenarios/cmd/python/keywords/pass_statement.yaml new file mode 100644 index 00000000..6b77f6b3 --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/pass_statement.yaml @@ -0,0 +1,30 @@ +description: python pass statement works in if/for/while/def/class bodies. +skip_assert_against_bash: true +setup: + files: + - path: pass.py + content: |+ + if True: + pass + for i in range(3): + pass + n = 0 + while n < 2: + n += 1 + pass + def noop(): + pass + class Empty: + pass + noop() + print("done") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python pass.py +expect: + stdout: |+ + done + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/keywords/with_statement.yaml b/tests/scenarios/cmd/python/keywords/with_statement.yaml new file mode 100644 index 00000000..49579130 --- /dev/null +++ b/tests/scenarios/cmd/python/keywords/with_statement.yaml @@ -0,0 +1,59 @@ +description: python with statement correctly passes exception class to __exit__, suppresses exceptions, and unwinds managers on __enter__ failure. +skip_assert_against_bash: true +input: + script: |+ + python -c " + # Test 1: __exit__ receives exception class as first argument + class CM: + def __enter__(self): return self + def __exit__(self, exc_type, exc_val, tb): + print('exc_type is class:', exc_type is ValueError) + return False + + try: + with CM(): + raise ValueError('test') + except ValueError: + pass + + # Test 2: inner __exit__ suppresses; outer sees (None, None, None) + class Suppressor: + def __enter__(self): return self + def __exit__(self, et, ev, tb): + return True # suppress + + class Outer: + def __enter__(self): return self + def __exit__(self, et, ev, tb): + print('outer exc_type after suppress:', et) + return False + + with Outer() as o, Suppressor() as s: + raise ValueError('suppressed') + + # Test 3: __enter__ failure unwinds already-entered managers + class CM1: + def __enter__(self): return self + def __exit__(self, *a): + print('CM1 exited') + return False + + class CM2: + def __enter__(self): raise RuntimeError('enter failed') + def __exit__(self, *a): + print('CM2 exited (should not print)') + + try: + with CM1() as a, CM2() as b: + print('body (should not print)') + except RuntimeError as e: + print('caught:', e) + " +expect: + stdout: |+ + exc_type is class: True + outer exc_type after suppress: None + CM1 exited + caught: enter failed + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/lambdas/lambda_basic.yaml b/tests/scenarios/cmd/python/lambdas/lambda_basic.yaml new file mode 100644 index 00000000..b0acdd23 --- /dev/null +++ b/tests/scenarios/cmd/python/lambdas/lambda_basic.yaml @@ -0,0 +1,21 @@ +description: python lambda creates anonymous single-expression functions. +skip_assert_against_bash: true +setup: + files: + - path: lambda.py + content: |+ + square = lambda x: x * x + add = lambda a, b: a + b + print(square(5)) + print(add(3, 4)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python lambda.py +expect: + stdout: |+ + 25 + 7 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/lambdas/lambda_map.yaml b/tests/scenarios/cmd/python/lambdas/lambda_map.yaml new file mode 100644 index 00000000..8c6dc976 --- /dev/null +++ b/tests/scenarios/cmd/python/lambdas/lambda_map.yaml @@ -0,0 +1,10 @@ +description: python lambda used with map() transforms each element of a list. +skip_assert_against_bash: true +input: + script: |+ + python -c "nums = [1, 2, 3, 4, 5]; doubled = list(map(lambda x: x * 2, nums)); print(doubled)" +expect: + stdout: |+ + [2, 4, 6, 8, 10] + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/lambdas/lambda_sorted.yaml b/tests/scenarios/cmd/python/lambdas/lambda_sorted.yaml new file mode 100644 index 00000000..cd6cf3e8 --- /dev/null +++ b/tests/scenarios/cmd/python/lambdas/lambda_sorted.yaml @@ -0,0 +1,23 @@ +description: python lambda used as key argument to sorted() orders by computed value. +skip_assert_against_bash: true +setup: + files: + - path: lambdasort.py + content: |+ + words = ["banana", "apple", "cherry", "date"] + sorted_words = sorted(words, key=lambda w: len(w)) + for w in sorted_words: + print(w) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python lambdasort.py +expect: + stdout: |+ + date + apple + banana + cherry + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/operators/augmented_assignment.yaml b/tests/scenarios/cmd/python/operators/augmented_assignment.yaml new file mode 100644 index 00000000..9662e186 --- /dev/null +++ b/tests/scenarios/cmd/python/operators/augmented_assignment.yaml @@ -0,0 +1,34 @@ +description: python augmented assignment operators update variables in-place. +skip_assert_against_bash: true +setup: + files: + - path: augmented.py + content: |+ + x = 10 + x += 5 + print(x) + x -= 3 + print(x) + x *= 2 + print(x) + x //= 3 + print(x) + x **= 2 + print(x) + x %= 7 + print(x) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python augmented.py +expect: + stdout: |+ + 15 + 12 + 24 + 8 + 64 + 1 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/operators/bigint_floordiv_mod.yaml b/tests/scenarios/cmd/python/operators/bigint_floordiv_mod.yaml new file mode 100644 index 00000000..1b37b27f --- /dev/null +++ b/tests/scenarios/cmd/python/operators/bigint_floordiv_mod.yaml @@ -0,0 +1,56 @@ +skip_assert_against_bash: true +tests: + - name: floor division of big integers + input: + script: |+ + python -c " + x = 10 ** 40 + print(x // 3) + print(x // -3) + print(-x // 3) + " + expect: + stdout: |+ + 3333333333333333333333333333333333333333 + -3333333333333333333333333333333333333334 + -3333333333333333333333333333333333333334 + + - name: modulo of big integers + input: + script: |+ + python -c " + x = 10 ** 40 + print(x % 7) + print(x % -7) + print(-x % 7) + " + expect: + stdout: |+ + 4 + -3 + 3 + + - name: floor division by big integer divisor + input: + script: |+ + python -c " + x = 10 ** 40 + print(x // (10 ** 20)) + " + expect: + stdout: |+ + 10000000000000000000 + + - name: floor division by zero raises ZeroDivisionError + input: + script: |+ + python -c " + x = 10 ** 40 + try: + print(x // 0) + except ZeroDivisionError as e: + print('ZeroDivisionError:', e) + " + expect: + stdout: |+ + ZeroDivisionError: integer division or modulo by zero diff --git a/tests/scenarios/cmd/python/operators/bitwise.yaml b/tests/scenarios/cmd/python/operators/bitwise.yaml new file mode 100644 index 00000000..74f361ed --- /dev/null +++ b/tests/scenarios/cmd/python/operators/bitwise.yaml @@ -0,0 +1,29 @@ +description: python bitwise operators AND, OR, XOR, NOT, left shift, and right shift work on integers. +skip_assert_against_bash: true +setup: + files: + - path: bitwise.py + content: |+ + a = 0b1010 + b = 0b1100 + print(bin(a & b)) + print(bin(a | b)) + print(bin(a ^ b)) + print(bin(~a & 0xFF)) + print(bin(a << 2)) + print(bin(b >> 1)) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python bitwise.py +expect: + stdout: |+ + 0b1000 + 0b1110 + 0b110 + 0b11110101 + 0b101000 + 0b110 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/operators/boolean_short_circuit.yaml b/tests/scenarios/cmd/python/operators/boolean_short_circuit.yaml new file mode 100644 index 00000000..9e06571c --- /dev/null +++ b/tests/scenarios/cmd/python/operators/boolean_short_circuit.yaml @@ -0,0 +1,33 @@ +description: python and/or operators short-circuit evaluation and return the determining operand. +skip_assert_against_bash: true +setup: + files: + - path: shortcircuit.py + content: |+ + def side_effect(val, msg): + print(msg) + return val + + print(False and side_effect(True, "should not print")) + print(True or side_effect(True, "should not print")) + print(True and side_effect(True, "and executed")) + print(False or side_effect(False, "or executed")) + print(not True) + print(not False) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python shortcircuit.py +expect: + stdout: |+ + False + True + and executed + True + or executed + False + False + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/operators/chained_comparisons.yaml b/tests/scenarios/cmd/python/operators/chained_comparisons.yaml new file mode 100644 index 00000000..841bc1de --- /dev/null +++ b/tests/scenarios/cmd/python/operators/chained_comparisons.yaml @@ -0,0 +1,26 @@ +description: python chained comparisons evaluate multiple comparisons without repeating operands. +skip_assert_against_bash: true +setup: + files: + - path: chained.py + content: |+ + x = 5 + print(1 < x < 10) + print(1 < x < 4) + print(0 <= x <= 5) + print(1 < 2 < 3 < 4) + print(1 == 1 == 2) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python chained.py +expect: + stdout: |+ + True + False + True + True + False + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/operators/mul_memory_limit.yaml b/tests/scenarios/cmd/python/operators/mul_memory_limit.yaml new file mode 100644 index 00000000..754900d7 --- /dev/null +++ b/tests/scenarios/cmd/python/operators/mul_memory_limit.yaml @@ -0,0 +1,38 @@ +description: python string/bytes/list/tuple repetition raises MemoryError when the result would exceed 1 MiB. +skip_assert_against_bash: true +input: + script: |+ + python -c " + import sys + try: + x = 'a' * (2**62) + print('no error') + except MemoryError: + print('MemoryError for str') + + try: + x = b'a' * (2**62) + print('no error') + except MemoryError: + print('MemoryError for bytes') + + try: + x = [1] * (2**62) + print('no error') + except MemoryError: + print('MemoryError for list') + + try: + x = (1,) * (2**62) + print('no error') + except MemoryError: + print('MemoryError for tuple') + " +expect: + stdout: |+ + MemoryError for str + MemoryError for bytes + MemoryError for list + MemoryError for tuple + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/operators/pow_exponent_limit.yaml b/tests/scenarios/cmd/python/operators/pow_exponent_limit.yaml new file mode 100644 index 00000000..1d3d8d0b --- /dev/null +++ b/tests/scenarios/cmd/python/operators/pow_exponent_limit.yaml @@ -0,0 +1,29 @@ +description: python ** operator and pow() builtin raise OverflowError when the exponent is too large to be computed safely. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + x = 2 ** 1000000000 + print('no error') + except OverflowError: + print('OverflowError for 2**1000000000') + + try: + x = pow(2, 1000000000) + print('no error') + except OverflowError: + print('OverflowError for pow(2, 1000000000)') + + # Small exponents should still work + print(2 ** 10) + print(pow(3, 5)) + " +expect: + stdout: |+ + OverflowError for 2**1000000000 + OverflowError for pow(2, 1000000000) + 1024 + 243 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/operators/pow_negative_exp_bigint.yaml b/tests/scenarios/cmd/python/operators/pow_negative_exp_bigint.yaml new file mode 100644 index 00000000..53289cd0 --- /dev/null +++ b/tests/scenarios/cmd/python/operators/pow_negative_exp_bigint.yaml @@ -0,0 +1,18 @@ +description: python ** operator with negative exponent and big-int base returns correct float (not +Inf). +skip_assert_against_bash: true +input: + script: |+ + python -c " + # (2**80)**-1 should be a small positive float, not +Inf + result = (2**80)**-1 + print(result > 0) + print(result < 1) + print(result == float('inf')) + " +expect: + stdout: |+ + True + True + False + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/operators/ternary.yaml b/tests/scenarios/cmd/python/operators/ternary.yaml new file mode 100644 index 00000000..aef685a8 --- /dev/null +++ b/tests/scenarios/cmd/python/operators/ternary.yaml @@ -0,0 +1,31 @@ +description: python ternary (conditional) expression selects one of two values based on a condition. +skip_assert_against_bash: true +setup: + files: + - path: ternary.py + content: |+ + x = 10 + result = "positive" if x > 0 else "non-positive" + print(result) + + def abs_val(n): + return n if n >= 0 else -n + + print(abs_val(-5)) + print(abs_val(3)) + + grade = "pass" if 60 <= 75 <= 100 else "fail" + print(grade) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python ternary.py +expect: + stdout: |+ + positive + 5 + 3 + pass + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/os_module/os_environ.yaml b/tests/scenarios/cmd/python/os_module/os_environ.yaml new file mode 100644 index 00000000..1baf386b --- /dev/null +++ b/tests/scenarios/cmd/python/os_module/os_environ.yaml @@ -0,0 +1,29 @@ +description: python os.environ is an empty dict — host environment variables are not accessible. +skip_assert_against_bash: true +setup: + files: + - path: environ.py + content: |+ + import os + env = os.environ + print(hasattr(env, 'get')) + # Non-existent key returns the default, not a host env var + val = env.get("NONEXISTENT_VAR_12345", "default_value") + print(val) + # PATH is always set in any real OS environment, but must be invisible here + path_val = env.get("PATH", "not_visible") + print(path_val) + print(len(env) == 0) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python environ.py +expect: + stdout: |+ + True + default_value + not_visible + True + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/os_module/os_getcwd.yaml b/tests/scenarios/cmd/python/os_module/os_getcwd.yaml new file mode 100644 index 00000000..d577963a --- /dev/null +++ b/tests/scenarios/cmd/python/os_module/os_getcwd.yaml @@ -0,0 +1,22 @@ +description: python os.getcwd() is not available (blocked). +skip_assert_against_bash: true +setup: + files: + - path: getcwd.py + content: |+ + import os + try: + os.getcwd() + print("not blocked") + except AttributeError: + print("blocked") + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python getcwd.py +expect: + stdout: |+ + blocked + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/os_module/os_name_linesep.yaml b/tests/scenarios/cmd/python/os_module/os_name_linesep.yaml new file mode 100644 index 00000000..26db95ba --- /dev/null +++ b/tests/scenarios/cmd/python/os_module/os_name_linesep.yaml @@ -0,0 +1,24 @@ +description: os.name and os.linesep reflect the host OS on all platforms. +skip_assert_against_bash: true +tests: + - name: os.name is posix on Unix + input: + script: |+ + python -c "import os; print(os.name)" + expect: + stdout: |+ + posix + stdout_windows: |+ + nt + exit_code: 0 + + - name: os.linesep is LF on Unix + input: + script: |+ + python -c "import os; print(repr(os.linesep))" + expect: + stdout: |+ + '\n' + stdout_windows: |+ + '\r\n' + exit_code: 0 diff --git a/tests/scenarios/cmd/python/os_module/os_path_abspath_blocked.yaml b/tests/scenarios/cmd/python/os_module/os_path_abspath_blocked.yaml new file mode 100644 index 00000000..bc5da55e --- /dev/null +++ b/tests/scenarios/cmd/python/os_module/os_path_abspath_blocked.yaml @@ -0,0 +1,10 @@ +description: python os.path.abspath() is not available (blocked — leaks host CWD via os.Getwd). +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; print(hasattr(os.path, 'abspath'))" +expect: + stdout: |+ + False + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/os_module/os_path_realpath_blocked.yaml b/tests/scenarios/cmd/python/os_module/os_path_realpath_blocked.yaml new file mode 100644 index 00000000..251c6d24 --- /dev/null +++ b/tests/scenarios/cmd/python/os_module/os_path_realpath_blocked.yaml @@ -0,0 +1,10 @@ +description: python os.path.realpath() is not available (blocked — leaks host CWD via os.Getwd). +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; print(hasattr(os.path, 'realpath'))" +expect: + stdout: |+ + False + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/sandbox/asyncio_blocked.yaml b/tests/scenarios/cmd/python/sandbox/asyncio_blocked.yaml new file mode 100644 index 00000000..54e9f5c1 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/asyncio_blocked.yaml @@ -0,0 +1,15 @@ +description: asyncio module is blocked and raises ImportError when imported, so try/except ImportError works correctly. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + import asyncio + print('imported') + except ImportError: + print('ImportError ok') + " +expect: + stdout: |+ + ImportError ok + exit_code: 0 diff --git a/tests/scenarios/cmd/python/sandbox/closed_file_io.yaml b/tests/scenarios/cmd/python/sandbox/closed_file_io.yaml new file mode 100644 index 00000000..b86cd590 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/closed_file_io.yaml @@ -0,0 +1,15 @@ +description: I/O operations on a closed file object raise ValueError. +skip_assert_against_bash: true +setup: + files: + - path: test.txt + content: "hello\n" + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python -c "f = open('test.txt'); f.close(); f.read()" +expect: + stdout: |+ + stderr_contains: ["ValueError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/ctypes_blocked.yaml b/tests/scenarios/cmd/python/sandbox/ctypes_blocked.yaml new file mode 100644 index 00000000..1b991b11 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/ctypes_blocked.yaml @@ -0,0 +1,15 @@ +description: ctypes module is blocked and raises ImportError when imported, so try/except ImportError works correctly. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + import ctypes + print('imported') + except ImportError: + print('ImportError ok') + " +expect: + stdout: |+ + ImportError ok + exit_code: 0 diff --git a/tests/scenarios/cmd/python/sandbox/glob_blocked.yaml b/tests/scenarios/cmd/python/sandbox/glob_blocked.yaml new file mode 100644 index 00000000..54f954e9 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/glob_blocked.yaml @@ -0,0 +1,9 @@ +description: glob module is blocked and raises ImportError when imported. +skip_assert_against_bash: true +input: + script: |+ + python -c "import glob; glob.glob('*')" +expect: + stdout: |+ + stderr_contains: ["ImportError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/multiprocessing_blocked.yaml b/tests/scenarios/cmd/python/sandbox/multiprocessing_blocked.yaml new file mode 100644 index 00000000..53a5d116 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/multiprocessing_blocked.yaml @@ -0,0 +1,15 @@ +description: multiprocessing module is blocked and raises ImportError when imported, so try/except ImportError works correctly. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + import multiprocessing + print('imported') + except ImportError: + print('ImportError ok') + " +expect: + stdout: |+ + ImportError ok + exit_code: 0 diff --git a/tests/scenarios/cmd/python/sandbox/open_append_blocked.yaml b/tests/scenarios/cmd/python/sandbox/open_append_blocked.yaml new file mode 100644 index 00000000..d1733d60 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/open_append_blocked.yaml @@ -0,0 +1,9 @@ +description: open() in append mode raises PermissionError. +skip_assert_against_bash: true +input: + script: |+ + python -c "open('/tmp/evil.txt', 'a')" +expect: + stdout: |+ + stderr_contains: ["PermissionError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/open_exclusive_blocked.yaml b/tests/scenarios/cmd/python/sandbox/open_exclusive_blocked.yaml new file mode 100644 index 00000000..483b916d --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/open_exclusive_blocked.yaml @@ -0,0 +1,9 @@ +description: open() in exclusive creation mode ('x') raises PermissionError. +skip_assert_against_bash: true +input: + script: |+ + python -c "open('/tmp/new.txt', 'x')" +expect: + stdout: |+ + stderr_contains: ["PermissionError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/open_outside_allowed_paths.yaml b/tests/scenarios/cmd/python/sandbox/open_outside_allowed_paths.yaml new file mode 100644 index 00000000..89f8ee91 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/open_outside_allowed_paths.yaml @@ -0,0 +1,15 @@ +description: open() cannot read files outside allowed paths and raises OSError. +skip_assert_against_bash: true +setup: + files: + - path: allowed.txt + content: "ok\n" + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python -c "open('/etc/passwd')" +expect: + stdout: |+ + stderr_contains: ["OSError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/open_readwrite_blocked.yaml b/tests/scenarios/cmd/python/sandbox/open_readwrite_blocked.yaml new file mode 100644 index 00000000..42c4beee --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/open_readwrite_blocked.yaml @@ -0,0 +1,9 @@ +description: open() in read-write mode ('r+') raises PermissionError. +skip_assert_against_bash: true +input: + script: |+ + python -c "open('/tmp/test.txt', 'r+')" +expect: + stdout: |+ + stderr_contains: ["PermissionError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/open_write_blocked.yaml b/tests/scenarios/cmd/python/sandbox/open_write_blocked.yaml new file mode 100644 index 00000000..6e1f679c --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/open_write_blocked.yaml @@ -0,0 +1,9 @@ +description: open() in write mode raises PermissionError. +skip_assert_against_bash: true +input: + script: |+ + python -c "open('/tmp/evil.txt', 'w')" +expect: + stdout: |+ + stderr_contains: ["PermissionError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_chmod_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_chmod_blocked.yaml new file mode 100644 index 00000000..a99e6127 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_chmod_blocked.yaml @@ -0,0 +1,9 @@ +description: os.chmod() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.chmod('/tmp/test', 0o755)" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_kill_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_kill_blocked.yaml new file mode 100644 index 00000000..7399039e --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_kill_blocked.yaml @@ -0,0 +1,9 @@ +description: os.kill() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.kill(1, 9)" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_listdir_outside_allowed_paths.yaml b/tests/scenarios/cmd/python/sandbox/os_listdir_outside_allowed_paths.yaml new file mode 100644 index 00000000..3799491d --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_listdir_outside_allowed_paths.yaml @@ -0,0 +1,15 @@ +description: os.listdir() cannot list directories outside allowed paths and raises OSError. +skip_assert_against_bash: true +setup: + files: + - path: subdir/file.txt + content: "ok\n" + chmod: 0644 +input: + allowed_paths: ["$DIR/subdir"] + script: |+ + python -c "import os; os.listdir('.')" +expect: + stdout: |+ + stderr_contains: ["OSError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_mkdir_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_mkdir_blocked.yaml new file mode 100644 index 00000000..dfb0a4d4 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_mkdir_blocked.yaml @@ -0,0 +1,9 @@ +description: os.mkdir() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.mkdir('/tmp/testdir')" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_putenv_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_putenv_blocked.yaml new file mode 100644 index 00000000..a7f7a87a --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_putenv_blocked.yaml @@ -0,0 +1,9 @@ +description: os.putenv() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.putenv('KEY', 'val')" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_remove_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_remove_blocked.yaml new file mode 100644 index 00000000..fe6c3c73 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_remove_blocked.yaml @@ -0,0 +1,9 @@ +description: os.remove() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.remove('/tmp/anything')" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_rename_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_rename_blocked.yaml new file mode 100644 index 00000000..4c34b507 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_rename_blocked.yaml @@ -0,0 +1,9 @@ +description: os.rename() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.rename('/tmp/a', '/tmp/b')" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_symlink_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_symlink_blocked.yaml new file mode 100644 index 00000000..38f174bc --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_symlink_blocked.yaml @@ -0,0 +1,9 @@ +description: os.symlink() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.symlink('/tmp/src', '/tmp/dst')" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_system_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_system_blocked.yaml new file mode 100644 index 00000000..05f203fe --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_system_blocked.yaml @@ -0,0 +1,9 @@ +description: os.system() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.system('id')" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/os_unlink_blocked.yaml b/tests/scenarios/cmd/python/sandbox/os_unlink_blocked.yaml new file mode 100644 index 00000000..9a7c4afa --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/os_unlink_blocked.yaml @@ -0,0 +1,9 @@ +description: os.unlink() is blocked and raises AttributeError. +skip_assert_against_bash: true +input: + script: |+ + python -c "import os; os.unlink('/tmp/test')" +expect: + stdout: |+ + stderr_contains: ["AttributeError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/re_blocked.yaml b/tests/scenarios/cmd/python/sandbox/re_blocked.yaml new file mode 100644 index 00000000..e3e389e3 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/re_blocked.yaml @@ -0,0 +1,15 @@ +description: re module is blocked and raises ImportError when imported, so try/except ImportError works correctly. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + import re + print('imported') + except ImportError: + print('ImportError ok') + " +expect: + stdout: |+ + ImportError ok + exit_code: 0 diff --git a/tests/scenarios/cmd/python/sandbox/socket_blocked.yaml b/tests/scenarios/cmd/python/sandbox/socket_blocked.yaml new file mode 100644 index 00000000..60c17043 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/socket_blocked.yaml @@ -0,0 +1,15 @@ +description: socket module is blocked and raises ImportError when imported, so try/except ImportError works correctly. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + import socket + print('imported') + except ImportError: + print('ImportError ok') + " +expect: + stdout: |+ + ImportError ok + exit_code: 0 diff --git a/tests/scenarios/cmd/python/sandbox/subprocess_blocked.yaml b/tests/scenarios/cmd/python/sandbox/subprocess_blocked.yaml new file mode 100644 index 00000000..0b23f3d9 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/subprocess_blocked.yaml @@ -0,0 +1,15 @@ +description: subprocess module is blocked and raises ImportError when imported, so try/except ImportError works correctly. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + import subprocess + print('imported') + except ImportError: + print('ImportError ok') + " +expect: + stdout: |+ + ImportError ok + exit_code: 0 diff --git a/tests/scenarios/cmd/python/sandbox/tempfile_blocked.yaml b/tests/scenarios/cmd/python/sandbox/tempfile_blocked.yaml new file mode 100644 index 00000000..46d1b8ef --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/tempfile_blocked.yaml @@ -0,0 +1,9 @@ +description: tempfile module is blocked and raises ImportError when imported. +skip_assert_against_bash: true +input: + script: |+ + python -c "import tempfile; tempfile.mkstemp()" +expect: + stdout: |+ + stderr_contains: ["ImportError"] + exit_code: 1 diff --git a/tests/scenarios/cmd/python/sandbox/threading_blocked.yaml b/tests/scenarios/cmd/python/sandbox/threading_blocked.yaml new file mode 100644 index 00000000..698b4069 --- /dev/null +++ b/tests/scenarios/cmd/python/sandbox/threading_blocked.yaml @@ -0,0 +1,15 @@ +description: threading module is blocked and raises ImportError when imported, so try/except ImportError works correctly. +skip_assert_against_bash: true +input: + script: |+ + python -c " + try: + import threading + print('imported') + except ImportError: + print('ImportError ok') + " +expect: + stdout: |+ + ImportError ok + exit_code: 0 diff --git a/tests/scenarios/cmd/python/shell_integration/exit_code_in_if.yaml b/tests/scenarios/cmd/python/shell_integration/exit_code_in_if.yaml new file mode 100644 index 00000000..b4e99cdd --- /dev/null +++ b/tests/scenarios/cmd/python/shell_integration/exit_code_in_if.yaml @@ -0,0 +1,14 @@ +description: python exit code can be used to branch in a shell if statement. +skip_assert_against_bash: true +input: + script: |+ + if python -c "import sys; sys.exit(1)"; then + echo "success" + else + echo "failure" + fi +expect: + stdout: |+ + failure + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/shell_integration/pipe_output.yaml b/tests/scenarios/cmd/python/shell_integration/pipe_output.yaml new file mode 100644 index 00000000..682f2c1b --- /dev/null +++ b/tests/scenarios/cmd/python/shell_integration/pipe_output.yaml @@ -0,0 +1,10 @@ +description: python output can be piped as stdin to another python invocation. +skip_assert_against_bash: true +input: + script: |+ + python -c "print('hello world')" | python -c "import sys; data = sys.stdin.read().strip(); print('got: ' + data)" +expect: + stdout: |+ + got: hello world + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/shell_integration/script_with_argv.yaml b/tests/scenarios/cmd/python/shell_integration/script_with_argv.yaml new file mode 100644 index 00000000..3fd25bad --- /dev/null +++ b/tests/scenarios/cmd/python/shell_integration/script_with_argv.yaml @@ -0,0 +1,21 @@ +description: positional arguments after the script name are passed as sys.argv[1:]. +skip_assert_against_bash: true +setup: + files: + - path: args.py + content: |+ + import sys + for arg in sys.argv[1:]: + print(arg) + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + python args.py alpha beta gamma +expect: + stdout: |+ + alpha + beta + gamma + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/shell_integration/sys_exit_256.yaml b/tests/scenarios/cmd/python/shell_integration/sys_exit_256.yaml new file mode 100644 index 00000000..135ed062 --- /dev/null +++ b/tests/scenarios/cmd/python/shell_integration/sys_exit_256.yaml @@ -0,0 +1,11 @@ +description: sys.exit(N) with N > 255 truncates to uint8 (POSIX exit code mod 256). +skip_assert_against_bash: true +input: + script: |+ + python -c "import sys; sys.exit(256)" + echo "exit=$?" +expect: + stdout: |+ + exit=0 + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/stdin/no_args_reads_stdin.yaml b/tests/scenarios/cmd/python/stdin/no_args_reads_stdin.yaml new file mode 100644 index 00000000..62862fe8 --- /dev/null +++ b/tests/scenarios/cmd/python/stdin/no_args_reads_stdin.yaml @@ -0,0 +1,10 @@ +description: python without any arguments reads Python source from stdin. +skip_assert_against_bash: true +input: + script: |+ + echo "print('no dash')" | python +expect: + stdout: |+ + no dash + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/stdin/read_from_stdin.yaml b/tests/scenarios/cmd/python/stdin/read_from_stdin.yaml new file mode 100644 index 00000000..c5a173f7 --- /dev/null +++ b/tests/scenarios/cmd/python/stdin/read_from_stdin.yaml @@ -0,0 +1,10 @@ +description: python reads source from stdin when invoked with '-'. +skip_assert_against_bash: true +input: + script: |+ + echo "print('from stdin')" | python - +expect: + stdout: |+ + from stdin + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/stdin/read_large_n_is_capped.yaml b/tests/scenarios/cmd/python/stdin/read_large_n_is_capped.yaml new file mode 100644 index 00000000..f6808905 --- /dev/null +++ b/tests/scenarios/cmd/python/stdin/read_large_n_is_capped.yaml @@ -0,0 +1,15 @@ +description: sys.stdin.read(n) with a very large n is capped at maxFileReadBytes to prevent OOM. +skip_assert_against_bash: true +tests: + - name: read with large n returns available bytes without OOM + input: + script: |+ + echo "hello" | python -c " + import sys + data = sys.stdin.read(1 << 30) + print(repr(data)) + " + expect: + stdout: |+ + 'hello\n' + exit_code: 0 diff --git a/tests/scenarios/cmd/python/stdin/sys_stdin_read.yaml b/tests/scenarios/cmd/python/stdin/sys_stdin_read.yaml new file mode 100644 index 00000000..82ce8f82 --- /dev/null +++ b/tests/scenarios/cmd/python/stdin/sys_stdin_read.yaml @@ -0,0 +1,10 @@ +description: Python code can read all of stdin via sys.stdin.read(). +skip_assert_against_bash: true +input: + script: |+ + echo "hello" | python -c "import sys; data = sys.stdin.read(); print(data.strip())" +expect: + stdout: |+ + hello + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/python/stdin/sys_stdin_readline.yaml b/tests/scenarios/cmd/python/stdin/sys_stdin_readline.yaml new file mode 100644 index 00000000..06ca63b9 --- /dev/null +++ b/tests/scenarios/cmd/python/stdin/sys_stdin_readline.yaml @@ -0,0 +1,10 @@ +description: Python code can read a single line from sys.stdin using readline(). +skip_assert_against_bash: true +input: + script: |+ + printf "first\nsecond\n" | python -c "import sys; line = sys.stdin.readline(); print(line.strip())" +expect: + stdout: |+ + first + stderr: |+ + exit_code: 0 diff --git a/tests/scenarios/cmd/unknown_cmd/common_progs/python.yaml b/tests/scenarios/cmd/unknown_cmd/common_progs/python.yaml index e3f386ae..9d4d0ce5 100644 --- a/tests/scenarios/cmd/unknown_cmd/common_progs/python.yaml +++ b/tests/scenarios/cmd/unknown_cmd/common_progs/python.yaml @@ -1,11 +1,11 @@ -# skip: rshell reports different error format than bash for unavailable commands +# python is now a builtin in rshell; update this test accordingly. skip_assert_against_bash: true -description: The python command is not a builtin and is rejected as unknown. +description: The python command is a builtin that executes Python 3 code. input: script: |+ python -c "print('hello')" expect: - stdout: "" + stdout: |+ + hello stderr: |+ - python: command not found - exit_code: 127 + exit_code: 0