From 210f0c526c281d6f36c296043f04f7032e64a26b Mon Sep 17 00:00:00 2001 From: rossberg-chromium Date: Tue, 4 Apr 2017 19:38:03 +0200 Subject: [PATCH 1/3] Names are UTF-8 encoded --- interpreter/host/import.ml | 5 +- interpreter/host/import.mli | 4 +- interpreter/host/import/env.ml | 2 +- interpreter/host/import/spectest.ml | 2 +- interpreter/host/js.ml | 23 +++--- interpreter/host/main.ml | 4 +- interpreter/host/run.ml | 22 +++--- interpreter/spec/ast.ml | 7 +- interpreter/spec/decode.ml | 12 ++-- interpreter/spec/encode.ml | 7 +- interpreter/spec/instance.ml | 2 +- interpreter/spec/utf8.ml | 105 ++++++++++++++++++++++++++++ interpreter/spec/utf8.mli | 4 ++ interpreter/spec/valid.ml | 2 +- interpreter/text/arrange.ml | 15 ++-- interpreter/text/parser.mly | 20 ++++-- interpreter/text/script.ml | 6 +- interpreter/util/lib.ml | 52 ++++++++------ interpreter/util/lib.mli | 2 + interpreter/winmake.bat | 6 +- 20 files changed, 224 insertions(+), 78 deletions(-) create mode 100644 interpreter/spec/utf8.ml create mode 100644 interpreter/spec/utf8.mli diff --git a/interpreter/host/import.ml b/interpreter/host/import.ml index b1d2bfe0fb..5eb04929e8 100644 --- a/interpreter/host/import.ml +++ b/interpreter/host/import.ml @@ -5,7 +5,7 @@ open Types module Unknown = Error.Make () exception Unknown = Unknown.Error (* indicates unknown import name *) -module Registry = Map.Make(String) +module Registry = Instance.ExportMap let registry = ref Registry.empty let register name lookup = registry := Registry.add name lookup !registry @@ -22,6 +22,7 @@ let lookup (m : module_) (im : import) : Instance.extern = let t = external_type_of_import_kind m ikind in try Registry.find module_name !registry item_name t with Not_found -> Unknown.error im.at - ("unknown import \"" ^ module_name ^ "." ^ item_name ^ "\"") + ("unknown import \"" ^ String.escaped (Utf8.encode module_name) ^ + "\".\"" ^ String.escaped (Utf8.encode item_name) ^ "\"") let link m = List.map (lookup m) m.it.imports diff --git a/interpreter/host/import.mli b/interpreter/host/import.mli index 9310553df1..7d29b59207 100644 --- a/interpreter/host/import.mli +++ b/interpreter/host/import.mli @@ -3,6 +3,6 @@ exception Unknown of Source.region * string val link : Ast.module_ -> Instance.extern list (* raises Unknown *) val register : - string -> - (string -> Types.external_type -> Instance.extern (* raise Not_found *)) -> + Ast.name -> + (Ast.name -> Types.external_type -> Instance.extern (* raise Not_found *)) -> unit diff --git a/interpreter/host/import/env.ml b/interpreter/host/import/env.ml index 9414fd1794..07dab62f7e 100644 --- a/interpreter/host/import/env.ml +++ b/interpreter/host/import/env.ml @@ -40,7 +40,7 @@ let exit vs = let lookup name t = - match name, t with + match Utf8.encode name, t with | "abort", ExternalFuncType t -> ExternalFunc (HostFunc (t, abort)) | "exit", ExternalFuncType t -> ExternalFunc (HostFunc (t, exit)) | _ -> raise Not_found diff --git a/interpreter/host/import/spectest.ml b/interpreter/host/import/spectest.ml index de9537517d..a5e0762543 100644 --- a/interpreter/host/import/spectest.ml +++ b/interpreter/host/import/spectest.ml @@ -28,7 +28,7 @@ let print (FuncType (_, out)) vs = let lookup name t = - match name, t with + match Utf8.encode name, t with | "print", ExternalFuncType t -> ExternalFunc (HostFunc (t, print t)) | "print", _ -> let t = FuncType ([], []) in ExternalFunc (HostFunc (t, print t)) diff --git a/interpreter/host/js.ml b/interpreter/host/js.ml index 6a36b6a02a..69f9756fad 100644 --- a/interpreter/host/js.ml +++ b/interpreter/host/js.ml @@ -139,14 +139,15 @@ let harness = (* Context *) module Map = Map.Make(String) +module ExportMap = Instance.ExportMap -type exports = external_type Map.t +type exports = external_type ExportMap.t type modules = {mutable env : exports Map.t; mutable current : int} let exports m : exports = List.fold_left - (fun map exp -> Map.add exp.it.name (export_type m exp) map) - Map.empty m.it.exports + (fun map exp -> ExportMap.add exp.it.name (export_type m exp) map) + ExportMap.empty m.it.exports let modules () : modules = {env = Map.empty; current = 0} @@ -167,8 +168,9 @@ let lookup (mods : modules) x_opt name at = raise (Eval.Crash (at, if x_opt = None then "no module defined within script" else "unknown module " ^ of_var_opt mods x_opt ^ " within script")) - in try Map.find name exports with Not_found -> - raise (Eval.Crash (at, "unknown export \"" ^ name ^ "\" within module")) + in try ExportMap.find name exports with Not_found -> + raise (Eval.Crash (at, "unknown export \"" ^ + String.escaped (Utf8.encode name) ^ "\" within module")) (* Wrappers *) @@ -245,7 +247,7 @@ let wrap module_name item_name wrap_action wrap_assertion at = let types = FuncType ([], []) :: itypes in let imports = [{module_name; item_name; ikind} @@ at] in let ekind = FuncExport @@ at in - let exports = [{name = "run"; ekind; item} @@ at] in + let exports = [{name = Utf8.decode "run"; ekind; item} @@ at] in let body = [ Block ([], action @ assertion @ [Return @@ at]) @@ at; Unreachable @@ at ] @@ -286,6 +288,7 @@ let of_string_with add_char s = let of_bytes = of_string_with add_hex_char let of_string = of_string_with add_char +let of_name n = of_string (Utf8.encode n) let of_float z = match string_of_float z with @@ -311,14 +314,14 @@ let of_definition def = let of_wrapper mods x_opt name wrap_action wrap_assertion at = let x = of_var_opt mods x_opt in - let bs = wrap x name wrap_action wrap_assertion at in + let bs = wrap (Utf8.decode x) name wrap_action wrap_assertion at in "call(instance(" ^ of_bytes bs ^ ", " ^ "exports(" ^ of_string x ^ ", " ^ x ^ ")), " ^ " \"run\", [])" let of_action mods act = match act.it with | Invoke (x_opt, name, lits) -> - "call(" ^ of_var_opt mods x_opt ^ ", " ^ of_string name ^ ", " ^ + "call(" ^ of_var_opt mods x_opt ^ ", " ^ of_name name ^ ", " ^ "[" ^ String.concat ", " (List.map of_literal lits) ^ "])", (match lookup mods x_opt name act.at with | ExternalFuncType ft when not (is_js_func_type ft) -> @@ -327,7 +330,7 @@ let of_action mods act = | _ -> None ) | Get (x_opt, name) -> - "get(" ^ of_var_opt mods x_opt ^ ", " ^ of_string name ^ ")", + "get(" ^ of_var_opt mods x_opt ^ ", " ^ of_name name ^ ")", (match lookup mods x_opt name act.at with | ExternalGlobalType gt when not (is_js_global_type gt) -> let GlobalType (t, _) = gt in @@ -383,7 +386,7 @@ let of_command mods cmd = (if x_opt = None then "" else "let " ^ of_var_opt mods x_opt ^ " = " ^ current_var mods ^ ";\n") | Register (name, x_opt) -> - "register(" ^ of_string name ^ ", " ^ of_var_opt mods x_opt ^ ")\n" + "register(" ^ of_name name ^ ", " ^ of_var_opt mods x_opt ^ ")\n" | Action act -> of_assertion' mods act "run" [] None ^ "\n" | Assertion ass -> diff --git a/interpreter/host/main.ml b/interpreter/host/main.ml index 5e62cab159..ab09cbd223 100644 --- a/interpreter/host/main.ml +++ b/interpreter/host/main.ml @@ -2,8 +2,8 @@ let name = "wasm" let version = "0.7" let configure () = - Import.register "spectest" Spectest.lookup; - Import.register "env" Env.lookup + Import.register (Utf8.decode "spectest") Spectest.lookup; + Import.register (Utf8.decode "env") Env.lookup let banner () = print_endline diff --git a/interpreter/host/run.ml b/interpreter/host/run.ml index 3e041c9a5d..1f7a9c22c7 100644 --- a/interpreter/host/run.ml +++ b/interpreter/host/run.ml @@ -188,6 +188,8 @@ let input_stdin run = (* Printing *) +let string_of_name n = String.escaped (Utf8.encode n) + let print_import m im = let open Types in let category, annotation = @@ -198,7 +200,8 @@ let print_import m im = | ExternalGlobalType t -> "global", string_of_global_type t in Printf.printf " import %s %S %S : %s\n" - category im.it.Ast.module_name im.it.Ast.item_name annotation + category (string_of_name im.it.Ast.module_name) + (string_of_name im.it.Ast.item_name) annotation let print_export m ex = let open Types in @@ -210,7 +213,7 @@ let print_export m ex = | ExternalGlobalType t -> "global", string_of_global_type t in Printf.printf " export %s %S : %s\n" - category (String.escaped ex.it.Ast.name) annotation + category (string_of_name ex.it.Ast.name) annotation let print_module x_opt m = Printf.printf "module%s :\n" @@ -272,7 +275,7 @@ let run_definition def = let run_action act = match act.it with | Invoke (x_opt, name, vs) -> - trace ("Invoking function \"" ^ name ^ "\"..."); + trace ("Invoking function \"" ^ string_of_name name ^ "\"..."); let inst = lookup_instance x_opt act.at in (match Instance.export inst name with | Some (Instance.ExternalFunc f) -> @@ -282,7 +285,7 @@ let run_action act = ) | Get (x_opt, name) -> - trace ("Getting global \"" ^ name ^ "\"..."); + trace ("Getting global \"" ^ string_of_name name ^ "\"..."); let inst = lookup_instance x_opt act.at in (match Instance.export inst name with | Some (Instance.ExternalGlobal v) -> [v] @@ -298,7 +301,10 @@ let assert_result at correct got print_expect expect = end let assert_message at name msg re = - if String.sub msg 0 (String.length re) <> re then begin + if + String.length msg < String.length re || + String.sub msg 0 (String.length re) <> re + then begin print_endline ("Result: \"" ^ msg ^ "\""); print_endline ("Expect: \"" ^ re ^ "\""); Assert.error at ("wrong " ^ name ^ " error") @@ -419,10 +425,10 @@ let rec run_command cmd = | Register (name, x_opt) -> quote := cmd :: !quote; if not !Flags.dry then begin - trace ("Registering module \"" ^ name ^ "\"..."); + trace ("Registering module \"" ^ string_of_name name ^ "\"..."); let inst = lookup_instance x_opt cmd.at in - registry := Map.add name inst !registry; - Import.register name (lookup_registry name) + registry := Map.add (Utf8.encode name) inst !registry; + Import.register name (lookup_registry (Utf8.encode name)) end | Action act -> diff --git a/interpreter/spec/ast.ml b/interpreter/spec/ast.ml index b0c48ee3a1..0eac63fd3f 100644 --- a/interpreter/spec/ast.ml +++ b/interpreter/spec/ast.ml @@ -65,6 +65,7 @@ type storeop = Memory.mem_size memop type var = int32 Source.phrase type literal = Values.value Source.phrase +type name = int list type instr = instr' Source.phrase and instr' = @@ -152,7 +153,7 @@ and export_kind' = FuncExport | TableExport | MemoryExport | GlobalExport type export = export' Source.phrase and export' = { - name : string; + name : name; ekind : export_kind; item : var; } @@ -167,8 +168,8 @@ and import_kind' = type import = import' Source.phrase and import' = { - module_name : string; - item_name : string; + module_name : name; + item_name : name; ikind : import_kind; } diff --git a/interpreter/spec/decode.ml b/interpreter/spec/decode.ml index 6e5b7f12ad..871ae2a7ff 100644 --- a/interpreter/spec/decode.ml +++ b/interpreter/spec/decode.ml @@ -114,6 +114,10 @@ let rec list f n s = if n = 0 then [] else let x = f s in x :: list f (n - 1) s let opt f b s = if b then Some (f s) else None let vec f s = let n = len32 s in list f n s +let name s = + let pos = pos s in + try Utf8.decode (string s) with Utf8.Utf8 -> error s pos "invalid UTF-8 encoding" + let sized f s = let size = len32 s in let start = pos s in @@ -491,8 +495,8 @@ let import_kind s = | _ -> error s (pos s - 1) "invalid import kind" let import s = - let module_name = string s in - let item_name = string s in + let module_name = name s in + let item_name = name s in let ikind = at import_kind s in {module_name; item_name; ikind} @@ -548,7 +552,7 @@ let export_kind s = | _ -> error s (pos s - 1) "invalid export kind" let export s = - let name = string s in + let name = name s in let ekind = at export_kind s in let item = at var s in {name; ekind; item} @@ -608,7 +612,7 @@ let data_section s = let custom size s = let start = pos s in - let _id = string s in + let _id = name s in skip (size - (pos s - start)) s; true diff --git a/interpreter/spec/encode.ml b/interpreter/spec/encode.ml index e2f367580b..dc7a3c905d 100644 --- a/interpreter/spec/encode.ml +++ b/interpreter/spec/encode.ml @@ -71,6 +71,7 @@ let encode m = let bool b = vu1 (if b then 1 else 0) let string bs = len (String.length bs); put_string s bs + let name n = string (Utf8.encode n) let list f xs = List.iter f xs let opt f xo = Lib.Option.app f xo let vec f xs = len (List.length xs); list f xs @@ -390,7 +391,7 @@ let encode m = let import im = let {module_name; item_name; ikind} = im.it in - string module_name; string item_name; import_kind ikind + name module_name; name item_name; import_kind ikind let import_section ims = section 2 (vec import) ims (ims <> []) @@ -434,8 +435,8 @@ let encode m = | GlobalExport -> u8 3 let export ex = - let {name; ekind; item} = ex.it in - string name; export_kind ekind; var item + let {name = n; ekind; item} = ex.it in + name n; export_kind ekind; var item let export_section exs = section 7 (vec export) exs (exs <> []) diff --git a/interpreter/spec/instance.ml b/interpreter/spec/instance.ml index 8c468bc9d7..bcc0195303 100644 --- a/interpreter/spec/instance.ml +++ b/interpreter/spec/instance.ml @@ -1,6 +1,6 @@ open Values -module ExportMap = Map.Make(String) +module ExportMap = Map.Make(struct type t = Ast.name let compare = compare end) type global = value ref diff --git a/interpreter/spec/utf8.ml b/interpreter/spec/utf8.ml new file mode 100644 index 0000000000..a22cd75c48 --- /dev/null +++ b/interpreter/spec/utf8.ml @@ -0,0 +1,105 @@ +exception Utf8 + +let en n = 0x80 lor (n land 0x3f) + +let rec encode ns = Lib.String.implode (List.map Char.chr (encode' ns)) +and encode' = function + | [] -> [] + | n::ns when n < 0 -> + raise Utf8 + | n::ns when n < 0x80 -> + n :: encode' ns + | n::ns when n < 0x800 -> + 0xc0 lor (n lsr 6) :: en n :: encode' ns + | n::ns when n < 0x10000 -> + 0xe0 lor (n lsr 12) :: en (n lsr 6) :: en n :: encode' ns + | n::ns when n < 0x110000 -> + 0xf0 lor (n lsr 18) :: en (n lsr 12) :: en (n lsr 6) :: en n :: encode' ns + | _ -> + raise Utf8 + +let de b = if b land 0xc0 = 0x80 then b land 0x3f else raise Utf8 + +let rec decode s = decode' (List.map Char.code (Lib.String.explode s)) +and decode' = function + | [] -> [] + | b1::bs when b1 < 0x80 -> + b1 :: decode' bs + | b1::bs when b1 < 0xc0 -> + raise Utf8 + | b1::b2::bs when b1 < 0xe0 -> + (b1 land 0x1f) lsl 6 + de b2 :: decode' bs + | b1::b2::b3::bs when b1 < 0xf0 -> + (b1 land 0x0f) lsl 12 + de b2 lsl 6 + de b3 :: decode' bs + | b1::b2::b3::b4::bs when b1 < 0xf8 -> + (b1 land 0x07) lsl 18 + de b2 lsl 12 + de b3 lsl 6 + de b4 :: decode' bs + | _ -> + raise Utf8 + + +(* +let encode a = + let buf = Buffer.create (4 * Array.length a) in + let put n = Buffer.add_char (Char.chr n) in + for i = 0 to Array.length a - 1 do + let n = a.(i) in + if n < 0 then + raise Utf8 + else if n < 0x80 then + put n + else if n < 0x800 then begin + put (0xc0 lor (n lsr 6)); + put (0x80 lor (n land 0x3f)) + end else if cn< 0x10000 then begin + put (0xe0 lor (n lsr 12)); + put (0x80 lor ((n lsr 6) land 0x3f)); + put (0x80 lor (n land 0x3f)) + end else if n < 0x110000 then begin + put (0xf0 lor (n lsr 18)); + put (0x80 lor (n lsr 12)); + put (0x80 lor ((n lsr 6) land 0x3f)); + put (0x80 lor (n land 0x3f)) + end else + raise Utf8 + done; + Buffer.contents buf + +let decode s = + let a = Array.create (String.length s) 0 in + let i = ref 0 in + let j = ref 0 in + let get () = i := !i + 1; Char.code s.[!i - 1] in + let get_cont () = + if !i = String.length s then raise Utf8; + let b = get () in + if b land 0xc0 = 0x80 then b land 0x3f else raise Utf8 + in + while !i < String.length s do + let b = get () in + let n = + if b < 0x80 then + b + else if b < 0xc0 then + raise Utf8 + else if b < 0xe0 then + let b2 = get_cont () in + ((b land 0x1f) lsl 6) + b2 + else if b < 0xf0 then + let b2 = get_cont () in + let b3 = get_cont () in + ((b land 0x0f) lsl 12) + (b2 lsl 6) + b3 + else if b < 0xf8 then + let b2 = get_cont () in + let b3 = get_cont () in + let b4 = get_cont () in + ((b land 0x07) lsl 18) + (b2 lsl 12) + (b3 lsl 6) + b4 + else + raise Utf8 + in + a.(!j) <- n; + j := !j + 1 + done; + let a' = Array.create !j 0 in + Array.blit a 0 a' 0 !j; + a' +*) diff --git a/interpreter/spec/utf8.mli b/interpreter/spec/utf8.mli new file mode 100644 index 0000000000..589d87ec51 --- /dev/null +++ b/interpreter/spec/utf8.mli @@ -0,0 +1,4 @@ +exception Utf8 + +val decode : string -> int list (* raise UTf8 *) +val encode : int list -> string (* raise Utf8 *) diff --git a/interpreter/spec/valid.ml b/interpreter/spec/valid.ml index 24f6b0a4ce..63b5e3ac0e 100644 --- a/interpreter/spec/valid.ml +++ b/interpreter/spec/valid.ml @@ -393,7 +393,7 @@ let check_import (im : import) (c : context) : context = "mutable globals cannot be imported (yet)"; {c with globals = t :: c.globals} -module NameSet = Set.Make(String) +module NameSet = Set.Make(struct type t = Ast.name let compare = compare end) let check_export (c : context) (set : NameSet.t) (ex : export) : NameSet.t = let {name; ekind; item} = ex.it in diff --git a/interpreter/text/arrange.ml b/interpreter/text/arrange.ml index 7334804544..48412ef456 100644 --- a/interpreter/text/arrange.ml +++ b/interpreter/text/arrange.ml @@ -29,6 +29,7 @@ let string_with add_char s = let bytes = string_with add_hex_char let string = string_with add_char +let name n = string (Utf8.encode n) let list_of_opt = function None -> [] | Some x -> [x] @@ -307,7 +308,7 @@ let import_kind i k = let import i im = let {module_name; item_name; ikind} = im.it in Node ("import", - [atom string module_name; atom string item_name; import_kind i ikind] + [atom name module_name; atom name item_name; import_kind i ikind] ) let export_kind k = @@ -318,9 +319,9 @@ let export_kind k = | GlobalExport -> "global" let export ex = - let {name; ekind; item} = ex.it in + let {name = n; ekind; item} = ex.it in Node ("export", - [atom string name; Node (export_kind ekind, [atom var item])] + [atom name n; Node (export_kind ekind, [atom var item])] ) let global off i g = @@ -394,8 +395,8 @@ let definition mode x_opt def = | Encoded (_, bs) -> bs in binary_module_with_var_opt x_opt bs -let access x_opt name = - String.concat " " [var_opt x_opt; string name] +let access x_opt n = + String.concat " " [var_opt x_opt; name n] let action act = match act.it with @@ -428,8 +429,8 @@ let assertion mode ass = let command mode cmd = match cmd.it with | Module (x_opt, def) -> definition mode x_opt def - | Register (name, x_opt) -> - Node ("register " ^ string name ^ var_opt x_opt, []) + | Register (n, x_opt) -> + Node ("register " ^ name n ^ var_opt x_opt, []) | Action act -> action act | Assertion ass -> assertion mode ass | Meta _ -> assert false diff --git a/interpreter/text/parser.mly b/interpreter/text/parser.mly index def0b811b8..cbeb803808 100644 --- a/interpreter/text/parser.mly +++ b/interpreter/text/parser.mly @@ -48,6 +48,9 @@ let nat s at = let nat32 s at = try I32.of_string_u s with Failure _ -> error at "i32 constant out of range" +let name s at = + try Utf8.decode s with Utf8.Utf8 -> error at "invalid UTF-8 encoding" + (* Symbolic variables *) @@ -190,6 +193,9 @@ let inline_type (c : context) ty at = /* Auxiliaries */ +name : + | TEXT { name $1 (at ()) } + text_list : | /* empty */ { "" } | text_list TEXT { $1 ^ $2 } @@ -527,7 +533,7 @@ import_kind : { fun c -> $3 c anon_global bind_global; GlobalImport $4 } import : - | LPAR IMPORT TEXT TEXT import_kind RPAR + | LPAR IMPORT name name import_kind RPAR { let at = at () and at5 = ati 5 in fun c -> {module_name = $3; item_name = $4; ikind = $5 c @@ at5} @@ at } | LPAR FUNC bind_var_opt inline_import type_use RPAR /* Sugar */ @@ -552,7 +558,7 @@ import : {module_name = fst $4; item_name = snd $4; ikind = GlobalImport $5 @@ at} @@ at } inline_import : - | LPAR IMPORT TEXT TEXT RPAR { $3, $4 } + | LPAR IMPORT name name RPAR { $3, $4 } export_kind : | LPAR FUNC var RPAR { fun c -> FuncExport, $3 c func } @@ -561,7 +567,7 @@ export_kind : | LPAR GLOBAL var RPAR { fun c -> GlobalExport, $3 c global } export : - | LPAR EXPORT TEXT export_kind RPAR + | LPAR EXPORT name export_kind RPAR { let at = at () and at4 = ati 4 in fun c -> let k, x = $4 c in {name = $3; ekind = k @@ at4; item = x} @@ at } @@ -571,7 +577,7 @@ inline_export_opt : | inline_export { $1 } inline_export : - | LPAR EXPORT TEXT RPAR + | LPAR EXPORT name RPAR { let at = at () in fun k count c -> [{name = $3; ekind = k @@ at; item = Int32.sub count 1l @@ at} @@ at] } @@ -646,9 +652,9 @@ script_var_opt : | VAR { Some ($1 @@ at ()) } /* Sugar */ action : - | LPAR INVOKE script_var_opt TEXT const_list RPAR + | LPAR INVOKE script_var_opt name const_list RPAR { Invoke ($3, $4, $5) @@ at () } - | LPAR GET script_var_opt TEXT RPAR + | LPAR GET script_var_opt name RPAR { Get ($3, $4) @@ at() } assertion : @@ -670,7 +676,7 @@ cmd : | action { Action $1 @@ at () } | assertion { Assertion $1 @@ at () } | module_ { Module (fst $1, snd $1) @@ at () } - | LPAR REGISTER TEXT script_var_opt RPAR { Register ($3, $4) @@ at () } + | LPAR REGISTER name script_var_opt RPAR { Register ($3, $4) @@ at () } | meta { Meta $1 @@ at () } cmd_list : diff --git a/interpreter/text/script.ml b/interpreter/text/script.ml index faa87e76fd..11032f1f6a 100644 --- a/interpreter/text/script.ml +++ b/interpreter/text/script.ml @@ -7,8 +7,8 @@ and definition' = type action = action' Source.phrase and action' = - | Invoke of var option * string * Ast.literal list - | Get of var option * string + | Invoke of var option * Ast.name * Ast.literal list + | Get of var option * Ast.name type assertion = assertion' Source.phrase and assertion' = @@ -25,7 +25,7 @@ and assertion' = type command = command' Source.phrase and command' = | Module of var option * definition - | Register of string * var option + | Register of Ast.name * var option | Action of action | Assertion of assertion | Meta of meta diff --git a/interpreter/util/lib.ml b/interpreter/util/lib.ml index 284329ecde..5140774952 100644 --- a/interpreter/util/lib.ml +++ b/interpreter/util/lib.ml @@ -4,6 +4,37 @@ struct if n = 0 then () else (f x; repeat (n - 1) f x) end +module Int = +struct + let log2 n = + if n <= 0 then failwith "log2"; + let rec loop acc n = if n = 1 then acc else loop (acc + 1) (n lsr 1) in + loop 0 n + + let is_power_of_two n = + if n < 0 then failwith "is_power_of_two"; + n <> 0 && n land (n - 1) = 0 +end + +module String = +struct + let implode cs = + let buf = Buffer.create (List.length cs) in + List.iter (Buffer.add_char buf) cs; + Buffer.contents buf + + let explode s = + let cs = ref [] in + for i = String.length s - 1 downto 0 do cs := s.[i] :: !cs done; + !cs + + let breakup s n = + let rec loop i = + let len = min n (String.length s - i) in + if len = 0 then [] else String.sub s i len :: loop (i + len) + in loop 0 +end + module List = struct let rec make n x = @@ -128,24 +159,3 @@ struct | Some x -> f x | None -> () end - -module Int = -struct - let log2 n = - if n <= 0 then failwith "log2"; - let rec loop acc n = if n = 1 then acc else loop (acc + 1) (n lsr 1) in - loop 0 n - - let is_power_of_two n = - if n < 0 then failwith "is_power_of_two"; - n <> 0 && n land (n - 1) = 0 -end - -module String = -struct - let breakup s n = - let rec loop i = - let len = min n (String.length s - i) in - if len = 0 then [] else String.sub s i len :: loop (i + len) - in loop 0 -end diff --git a/interpreter/util/lib.mli b/interpreter/util/lib.mli index 293070c036..5704e067c4 100644 --- a/interpreter/util/lib.mli +++ b/interpreter/util/lib.mli @@ -64,5 +64,7 @@ end module String : sig + val implode : char list -> string + val explode : string -> char list val breakup : string -> int -> string list end diff --git a/interpreter/winmake.bat b/interpreter/winmake.bat index e86d11985b..40489fd452 100644 --- a/interpreter/winmake.bat +++ b/interpreter/winmake.bat @@ -17,6 +17,7 @@ ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -bin-annot -I spec -I util -I h ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -bin-annot -I spec -I util -I host -I text -I host/import -o spec/table.cmi spec/table.mli ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -bin-annot -I spec -I util -I host -I text -I host/import -o spec/instance.cmo spec/instance.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -bin-annot -I spec -I util -I host -I text -I host/import -o spec/eval.cmi spec/eval.mli +ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -bin-annot -I spec -I util -I host -I text -I host/import -o spec/utf8.cmi spec/utf8.mli ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -bin-annot -I spec -I util -I host -I text -I host/import -o spec/encode.cmi spec/encode.mli ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -bin-annot -I host/import -I util -I spec -I host -I text -o host/import/env.cmo host/import/env.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -bin-annot -I host -I util -I spec -I text -I host/import -o host/flags.cmo host/flags.ml @@ -38,16 +39,17 @@ ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I host -I util - ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I host -I util -I spec -I text -I host/import -o host/import.d.cmo host/import.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I host -I util -I spec -I text -I host/import -o host/run.d.cmo host/run.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I host/import -I util -I spec -I host -I text -o host/import/spectest.d.cmo host/import/spectest.ml +ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/utf8.d.cmo spec/utf8.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/ast.d.cmo spec/ast.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I util -I spec -I host -I text -I host/import -o util/error.d.cmo util/error.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/f32.d.cmo spec/f32.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/f64.d.cmo spec/f64.ml -ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/i32.d.cmo spec/i32.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I util -I spec -I host -I text -I host/import -o util/lib.d.cmo util/lib.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/memory.d.cmo spec/memory.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I util -I spec -I host -I text -I host/import -o util/source.d.cmo util/source.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/types.d.cmo spec/types.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/values.d.cmo spec/values.ml +ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/i32.d.cmo spec/i32.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/i64.d.cmo spec/i64.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/float.d.cmo spec/float.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I spec -I util -I host -I text -I host/import -o spec/int.d.cmo spec/int.ml @@ -83,4 +85,4 @@ ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I text -I util - ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I text -I util -I spec -I host -I host/import -o text/parser.d.cmo text/parser.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I text -I util -I spec -I host -I host/import -o text/arrange.d.cmo text/arrange.ml ocamlc.opt -c -w +a-4-27-42-44-45 -warn-error +a -g -bin-annot -I util -I spec -I host -I text -I host/import -o util/sexpr.d.cmo util/sexpr.ml -ocamlc.opt str.cma bigarray.cma -g host/flags.d.cmo spec/float.d.cmo spec/f32.d.cmo spec/f64.d.cmo spec/numeric_error.d.cmo spec/int.d.cmo spec/i32.d.cmo spec/i64.d.cmo spec/types.d.cmo spec/values.d.cmo util/lib.d.cmo spec/memory.d.cmo util/source.d.cmo spec/ast.d.cmo spec/table.d.cmo spec/instance.d.cmo util/error.d.cmo host/import.d.cmo spec/i32_convert.d.cmo spec/f32_convert.d.cmo spec/i64_convert.d.cmo spec/f64_convert.d.cmo spec/eval_numeric.d.cmo spec/eval.d.cmo host/import/env.d.cmo host/import/spectest.d.cmo spec/encode.d.cmo spec/operators.d.cmo spec/decode.d.cmo text/script.d.cmo host/js.d.cmo spec/valid.d.cmo text/parser.d.cmo text/lexer.d.cmo text/parse.d.cmo util/sexpr.d.cmo text/arrange.d.cmo text/print.d.cmo host/run.d.cmo host/main.d.cmo -o %NAME%.exe +ocamlc.opt bigarray.cma -g host/flags.d.cmo spec/float.d.cmo spec/f32.d.cmo spec/f64.d.cmo spec/numeric_error.d.cmo spec/int.d.cmo spec/i32.d.cmo spec/i64.d.cmo spec/types.d.cmo spec/values.d.cmo util/lib.d.cmo spec/memory.d.cmo util/source.d.cmo spec/ast.d.cmo spec/table.d.cmo spec/instance.d.cmo spec/utf8.d.cmo util/error.d.cmo host/import.d.cmo spec/i32_convert.d.cmo spec/f32_convert.d.cmo spec/i64_convert.d.cmo spec/f64_convert.d.cmo spec/eval_numeric.d.cmo spec/eval.d.cmo host/import/env.d.cmo host/import/spectest.d.cmo spec/encode.d.cmo spec/operators.d.cmo spec/decode.d.cmo text/script.d.cmo host/js.d.cmo spec/valid.d.cmo text/parser.d.cmo text/lexer.d.cmo text/parse.d.cmo util/sexpr.d.cmo text/arrange.d.cmo text/print.d.cmo host/run.d.cmo host/main.d.cmo -o %NAME%.exe From a222d1726ede3e6e33420ec548a973254792a7ea Mon Sep 17 00:00:00 2001 From: Andreas Rossberg Date: Wed, 5 Apr 2017 09:32:10 +0200 Subject: [PATCH 2/3] Rule out overlong and surrogates --- interpreter/spec/utf8.ml | 91 +++++++--------------------------------- 1 file changed, 14 insertions(+), 77 deletions(-) diff --git a/interpreter/spec/utf8.ml b/interpreter/spec/utf8.ml index a22cd75c48..5cf17997e5 100644 --- a/interpreter/spec/utf8.ml +++ b/interpreter/spec/utf8.ml @@ -1,6 +1,6 @@ exception Utf8 -let en n = 0x80 lor (n land 0x3f) +let con n = 0x80 lor (n land 0x3f) let rec encode ns = Lib.String.implode (List.map Char.chr (encode' ns)) and encode' = function @@ -10,96 +10,33 @@ and encode' = function | n::ns when n < 0x80 -> n :: encode' ns | n::ns when n < 0x800 -> - 0xc0 lor (n lsr 6) :: en n :: encode' ns + 0xc0 lor (n lsr 6) :: con n :: encode' ns | n::ns when n < 0x10000 -> - 0xe0 lor (n lsr 12) :: en (n lsr 6) :: en n :: encode' ns + 0xe0 lor (n lsr 12) :: con (n lsr 6) :: con n :: encode' ns | n::ns when n < 0x110000 -> - 0xf0 lor (n lsr 18) :: en (n lsr 12) :: en (n lsr 6) :: en n :: encode' ns + 0xf0 lor (n lsr 18) :: con (n lsr 12) :: con (n lsr 6) :: con n + :: encode' ns | _ -> raise Utf8 -let de b = if b land 0xc0 = 0x80 then b land 0x3f else raise Utf8 +let con b = if b land 0xc0 = 0x80 then b land 0x3f else raise Utf8 +let code min n = + if n < min || (0xd800 <= n && n < 0xe000) || n >= 0x110000 then raise Utf8 + else n let rec decode s = decode' (List.map Char.code (Lib.String.explode s)) and decode' = function | [] -> [] | b1::bs when b1 < 0x80 -> - b1 :: decode' bs + code 0x0 b1 :: decode' bs | b1::bs when b1 < 0xc0 -> raise Utf8 | b1::b2::bs when b1 < 0xe0 -> - (b1 land 0x1f) lsl 6 + de b2 :: decode' bs + code 0x80 ((b1 land 0x1f) lsl 6 + con b2) :: decode' bs | b1::b2::b3::bs when b1 < 0xf0 -> - (b1 land 0x0f) lsl 12 + de b2 lsl 6 + de b3 :: decode' bs + code 0x800 ((b1 land 0x0f) lsl 12 + con b2 lsl 6 + con b3) :: decode' bs | b1::b2::b3::b4::bs when b1 < 0xf8 -> - (b1 land 0x07) lsl 18 + de b2 lsl 12 + de b3 lsl 6 + de b4 :: decode' bs + code 0x10000 ((b1 land 0x07) lsl 18 + con b2 lsl 12 + con b3 lsl 6 + con b4) + :: decode' bs | _ -> raise Utf8 - - -(* -let encode a = - let buf = Buffer.create (4 * Array.length a) in - let put n = Buffer.add_char (Char.chr n) in - for i = 0 to Array.length a - 1 do - let n = a.(i) in - if n < 0 then - raise Utf8 - else if n < 0x80 then - put n - else if n < 0x800 then begin - put (0xc0 lor (n lsr 6)); - put (0x80 lor (n land 0x3f)) - end else if cn< 0x10000 then begin - put (0xe0 lor (n lsr 12)); - put (0x80 lor ((n lsr 6) land 0x3f)); - put (0x80 lor (n land 0x3f)) - end else if n < 0x110000 then begin - put (0xf0 lor (n lsr 18)); - put (0x80 lor (n lsr 12)); - put (0x80 lor ((n lsr 6) land 0x3f)); - put (0x80 lor (n land 0x3f)) - end else - raise Utf8 - done; - Buffer.contents buf - -let decode s = - let a = Array.create (String.length s) 0 in - let i = ref 0 in - let j = ref 0 in - let get () = i := !i + 1; Char.code s.[!i - 1] in - let get_cont () = - if !i = String.length s then raise Utf8; - let b = get () in - if b land 0xc0 = 0x80 then b land 0x3f else raise Utf8 - in - while !i < String.length s do - let b = get () in - let n = - if b < 0x80 then - b - else if b < 0xc0 then - raise Utf8 - else if b < 0xe0 then - let b2 = get_cont () in - ((b land 0x1f) lsl 6) + b2 - else if b < 0xf0 then - let b2 = get_cont () in - let b3 = get_cont () in - ((b land 0x0f) lsl 12) + (b2 lsl 6) + b3 - else if b < 0xf8 then - let b2 = get_cont () in - let b3 = get_cont () in - let b4 = get_cont () in - ((b land 0x07) lsl 18) + (b2 lsl 12) + (b3 lsl 6) + b4 - else - raise Utf8 - in - a.(!j) <- n; - j := !j + 1 - done; - let a' = Array.create !j 0 in - Array.blit a 0 a' 0 !j; - a' -*) From bfd34c659e52520f0c5b99a66cf8507aff252697 Mon Sep 17 00:00:00 2001 From: Andreas Rossberg Date: Wed, 5 Apr 2017 14:25:15 +0200 Subject: [PATCH 3/3] Line break --- interpreter/spec/decode.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/interpreter/spec/decode.ml b/interpreter/spec/decode.ml index 871ae2a7ff..5ddce7d10a 100644 --- a/interpreter/spec/decode.ml +++ b/interpreter/spec/decode.ml @@ -116,7 +116,8 @@ let vec f s = let n = len32 s in list f n s let name s = let pos = pos s in - try Utf8.decode (string s) with Utf8.Utf8 -> error s pos "invalid UTF-8 encoding" + try Utf8.decode (string s) with Utf8.Utf8 -> + error s pos "invalid UTF-8 encoding" let sized f s = let size = len32 s in