From 7ec7ecc09c793a032bf7990970a3029c85141a1c Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 21 Dec 2025 06:29:51 +0000 Subject: [PATCH] Implement chr and ord builtins in eldritch-core - Added `chr(i)` to convert integer code points to characters. - Added `ord(c)` to convert characters or single-byte bytes to integer code points. - Registered new builtins in `interpreter/builtins/mod.rs`. - Added comprehensive unit tests in `tests/builtins_chr_ord.rs`. --- .../src/interpreter/builtins/chr.rs | 37 ++++++++ .../src/interpreter/builtins/mod.rs | 4 + .../src/interpreter/builtins/ord.rs | 47 ++++++++++ .../eldritch-core/tests/builtins_chr_ord.rs | 86 +++++++++++++++++++ 4 files changed, 174 insertions(+) create mode 100644 implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/chr.rs create mode 100644 implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/ord.rs create mode 100644 implants/lib/eldritchv2/eldritch-core/tests/builtins_chr_ord.rs diff --git a/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/chr.rs b/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/chr.rs new file mode 100644 index 000000000..f300d90ab --- /dev/null +++ b/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/chr.rs @@ -0,0 +1,37 @@ +use crate::ast::{Environment, Value}; +use crate::interpreter::introspection::get_type_name; +use alloc::format; +use alloc::string::String; +use alloc::sync::Arc; +use core::char; +use spin::RwLock; + +/// `chr(i)`: Return the string representing a character whose Unicode code point is the integer `i`. +/// +/// **Parameters** +/// - `i` (Int): The integer code point. +pub fn builtin_chr(_env: &Arc>, args: &[Value]) -> Result { + if args.len() != 1 { + return Err(format!( + "chr() takes exactly one argument ({} given)", + args.len() + )); + } + match &args[0] { + Value::Int(i) => { + // Valid range for char is roughly 0 to 0x10FFFF + // Rust char::from_u32 checks this. + if *i < 0 || *i > 0x10FFFF { + return Err(format!("chr() arg not in range(0x110000)")); + } + match char::from_u32(*i as u32) { + Some(c) => Ok(Value::String(String::from(c))), + None => Err(format!("chr() arg not in range(0x110000)")), + } + } + _ => Err(format!( + "TypeError: an integer is required (got type {})", + get_type_name(&args[0]) + )), + } +} diff --git a/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/mod.rs b/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/mod.rs index 6b5a37024..6670b6761 100644 --- a/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/mod.rs +++ b/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/mod.rs @@ -8,6 +8,7 @@ mod assert_eq; mod bool; mod builtins_fn; mod bytes; +mod chr; mod dir; mod enumerate; mod eprint; @@ -15,6 +16,7 @@ mod fail; mod int; mod len; mod libs; +mod ord; mod pprint; mod print; mod range; @@ -58,6 +60,8 @@ pub fn get_all_builtins() -> Vec<(&'static str, BuiltinFn)> { ("libs", libs::builtin_libs as BuiltinFn), ("builtins", builtins_fn::builtin_builtins as BuiltinFn), ("bytes", bytes::builtin_bytes as BuiltinFn), + ("chr", chr::builtin_chr as BuiltinFn), + ("ord", ord::builtin_ord as BuiltinFn), // New ("abs", abs::builtin_abs as BuiltinFn), ("any", any::builtin_any as BuiltinFn), diff --git a/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/ord.rs b/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/ord.rs new file mode 100644 index 000000000..7120c38ec --- /dev/null +++ b/implants/lib/eldritchv2/eldritch-core/src/interpreter/builtins/ord.rs @@ -0,0 +1,47 @@ +use crate::ast::{Environment, Value}; +use crate::interpreter::introspection::get_type_name; +use alloc::format; +use alloc::string::String; +use alloc::sync::Arc; +use spin::RwLock; + +/// `ord(c)`: Return the integer that represents the Unicode code point of the character `c`. +/// +/// **Parameters** +/// - `c` (String | Bytes): A string of length 1 or bytes of length 1. +pub fn builtin_ord(_env: &Arc>, args: &[Value]) -> Result { + if args.len() != 1 { + return Err(format!( + "ord() takes exactly one argument ({} given)", + args.len() + )); + } + match &args[0] { + Value::String(s) => { + let mut chars = s.chars(); + if let Some(c) = chars.next() { + if chars.next().is_none() { + return Ok(Value::Int(c as i64)); + } + } + Err(format!( + "ord() expected string of length 1, but string '{}' found", + s + )) + } + Value::Bytes(b) => { + if b.len() == 1 { + Ok(Value::Int(b[0] as i64)) + } else { + Err(format!( + "ord() expected bytes of length 1, but bytes of length {} found", + b.len() + )) + } + } + _ => Err(format!( + "TypeError: ord() expected string of length 1, but {} found", + get_type_name(&args[0]) + )), + } +} diff --git a/implants/lib/eldritchv2/eldritch-core/tests/builtins_chr_ord.rs b/implants/lib/eldritchv2/eldritch-core/tests/builtins_chr_ord.rs new file mode 100644 index 000000000..3daaeeb45 --- /dev/null +++ b/implants/lib/eldritchv2/eldritch-core/tests/builtins_chr_ord.rs @@ -0,0 +1,86 @@ +extern crate alloc; +extern crate eldritch_core; + +#[cfg(test)] +mod tests { + use eldritch_core::Interpreter; + use eldritch_core::Value; + use alloc::string::String; + + #[test] + fn test_chr() { + let mut interp = Interpreter::new(); + + // Valid integers + let res = interp.interpret("chr(65)"); + assert_eq!(res.unwrap(), Value::String(String::from("A"))); + + let res = interp.interpret("chr(97)"); + assert_eq!(res.unwrap(), Value::String(String::from("a"))); + + let res = interp.interpret("chr(8364)"); + assert_eq!(res.unwrap(), Value::String(String::from("€"))); + + // Edge cases + let res = interp.interpret("chr(0)"); + assert_eq!(res.unwrap(), Value::String(String::from("\0"))); + + let res = interp.interpret("chr(1114111)"); // 0x10FFFF + match res { + Ok(_) => assert!(true), + Err(e) => panic!("Should accept 0x10FFFF: {}", e), + } + + // Invalid integer (out of range) + let res = interp.interpret("chr(1114112)"); + assert!(res.is_err()); + assert!(res.unwrap_err().contains("chr() arg not in range(0x110000)")); + + let res = interp.interpret("chr(-1)"); + assert!(res.is_err()); + assert!(res.unwrap_err().contains("chr() arg not in range(0x110000)")); + + // Type error + let res = interp.interpret("chr('A')"); + assert!(res.is_err()); + assert!(res.unwrap_err().contains("TypeError")); + } + + #[test] + fn test_ord() { + let mut interp = Interpreter::new(); + + // Valid strings + let res = interp.interpret("ord('A')"); + assert_eq!(res.unwrap(), Value::Int(65)); + + let res = interp.interpret("ord('a')"); + assert_eq!(res.unwrap(), Value::Int(97)); + + let res = interp.interpret("ord('€')"); + assert_eq!(res.unwrap(), Value::Int(8364)); + + // Valid bytes + let res = interp.interpret("ord(bytes([65]))"); + assert_eq!(res.unwrap(), Value::Int(65)); + + // Invalid strings (length != 1) + let res = interp.interpret("ord('AB')"); + assert!(res.is_err()); + assert!(res.unwrap_err().contains("expected string of length 1")); + + let res = interp.interpret("ord('')"); + assert!(res.is_err()); + assert!(res.unwrap_err().contains("expected string of length 1")); + + // Invalid bytes (length != 1) + let res = interp.interpret("ord(bytes([65, 66]))"); + assert!(res.is_err()); + assert!(res.unwrap_err().contains("expected bytes of length 1")); + + // Type error + let res = interp.interpret("ord(1)"); + assert!(res.is_err()); + assert!(res.unwrap_err().contains("TypeError")); + } +}