|
| 1 | +(** |
| 2 | + * Delta Tree |
| 3 | + * |
| 4 | + * @author Nathaniel Gray |
| 5 | + * @version 0.1 |
| 6 | + * |
| 7 | + * This is a proof-of-concept of a data structure that can be used, for example, |
| 8 | + * to keep track of marks (or newlines) in a text file while supporting quick |
| 9 | + * insertion and deletion of text. The main "innovation" comes from storing |
| 10 | + * the elements as offsets rather than absolute values. I think that this |
| 11 | + * idea can be applied to almost any type of tree, but I'm using 2-3 trees |
| 12 | + * because I find them easy to think about. |
| 13 | + * |
| 14 | + * The idea is that as you dig down into the tree you keep a sum of the |
| 15 | + * deltas that you pass on the right. So this is one possible tree for |
| 16 | + * the values [50, 75, 100, 130, 150, 200, 295, 297]: |
| 17 | + * {v |
| 18 | + [ 100 100 ] |
| 19 | + ___/ | \___ |
| 20 | + / | \ |
| 21 | + [50 25] [30 20] [95 2] |
| 22 | + v} |
| 23 | + * To retrieve the values, perform an inorder walk of the tree, adding the |
| 24 | + * values in each node. This representation is useful because if you want to |
| 25 | + * insert 35 characters at position 110 you only need to update two entries |
| 26 | + * (marked with * ): |
| 27 | + * {v |
| 28 | + [ 100 *135 ] |
| 29 | + ___/ | \___ |
| 30 | + / | \ |
| 31 | + [50 25] [*65 20] [95 2] |
| 32 | + v} |
| 33 | + * Text insertion costs h (the height of the tree) additions in the worst |
| 34 | + * case. Adding or deleting marks has the usual log n cost for element |
| 35 | + * insertion/deletion. |
| 36 | + * |
| 37 | + *) |
| 38 | + |
| 39 | +open Printf |
| 40 | + |
| 41 | +let debug = true |
| 42 | + |
| 43 | +module type DeltaTree = |
| 44 | +sig |
| 45 | + type 'a t |
| 46 | + |
| 47 | + val empty : 'a t |
| 48 | + val add : 'a t -> int -> 'a -> 'a t |
| 49 | + val find : 'a t -> int -> int * 'a |
| 50 | + val print_tree : ('a -> string) -> 'a t -> unit |
| 51 | + (* val remove : 'a t -> int -> 'a t *) |
| 52 | +end |
| 53 | + |
| 54 | +module DeltaTree : DeltaTree = |
| 55 | +struct |
| 56 | + type 'elt dtree = |
| 57 | + Node of 'elt dtree * (int * 'elt) * 'elt dtree * (int * 'elt) option |
| 58 | + * 'elt dtree |
| 59 | + | Leaf |
| 60 | + |
| 61 | + type 'a t = 'a dtree |
| 62 | + |
| 63 | + let empty = Leaf |
| 64 | + |
| 65 | + type direction = Left | Center | Right |
| 66 | + |
| 67 | + let rec print_tree printer indent prefix tree = |
| 68 | + let print_tree = print_tree printer in |
| 69 | + match tree with |
| 70 | + Leaf -> () (* printf "%s%sLeaf\n" indent prefix *) |
| 71 | + | Node (tl, (pl, el), tc, er_opt, tr) -> |
| 72 | + begin match er_opt with |
| 73 | + Some (pr, er) -> |
| 74 | + print_tree (indent ^ " ") "/-" tr; |
| 75 | + printf "%s%s[%i, %s]\n" indent prefix pr (printer er); |
| 76 | + print_tree (indent ^ " ") ">-" tc |
| 77 | + | None -> |
| 78 | + print_tree (indent ^ " ") "/-" tc |
| 79 | + end; |
| 80 | + printf "%s%s[%i, %s]\n" indent prefix pl (printer el); |
| 81 | + print_tree (indent ^ " ") "\\-" tl |
| 82 | + |
| 83 | + let print_tree printer tree = |
| 84 | + print_tree printer "" "" tree |
| 85 | + |
| 86 | + type 'elt pick_subtree_result = |
| 87 | + MatchElt of direction * (int * 'elt) |
| 88 | + | Subtree of direction * (int * 'elt) option * int * 'elt dtree |
| 89 | + |
| 90 | + (* A utility function *) |
| 91 | + let dtree_pick_subtree d (t1, e1, t2, e2opt, t3) pos = |
| 92 | + (* if debug then |
| 93 | + printf "pick_subtree: %i, %i\n" d pos; *) |
| 94 | + let pos1 = d + fst e1 in |
| 95 | + if pos = pos1 then |
| 96 | + MatchElt (Left, e1) |
| 97 | + else if pos < pos1 then |
| 98 | + Subtree (Left, None, d, t1) |
| 99 | + else |
| 100 | + let d = pos1 in |
| 101 | + match e2opt with |
| 102 | + None -> |
| 103 | + Subtree (Center, Some e1, d, t2) |
| 104 | + | Some e2 -> |
| 105 | + (* if debug then |
| 106 | + printf "pick_subtree2: %i, %i\n" d pos; *) |
| 107 | + let pos2 = d + fst e2 in |
| 108 | + if pos = pos2 then |
| 109 | + MatchElt (Right, e2) |
| 110 | + else if pos < pos2 then |
| 111 | + Subtree (Center, Some e1, d, t2) |
| 112 | + else |
| 113 | + Subtree (Right, Some e2, pos2, t3) |
| 114 | + |
| 115 | + |
| 116 | + let rec dtree_find_pos d best_elt dtree pos : (int * 'elt) option = |
| 117 | + match dtree with |
| 118 | + Node (t1, e1, t2, e2opt, t3) -> |
| 119 | + begin |
| 120 | + match dtree_pick_subtree d (t1, e1, t2, e2opt, t3) pos with |
| 121 | + MatchElt (_, e) -> |
| 122 | + Some e |
| 123 | + | Subtree (_, lparent, d, t) -> |
| 124 | + begin |
| 125 | + let best_elt = match lparent with |
| 126 | + None -> best_elt |
| 127 | + | e -> e |
| 128 | + in |
| 129 | + dtree_find_pos d best_elt t pos |
| 130 | + end |
| 131 | + end |
| 132 | + | Leaf -> |
| 133 | + best_elt |
| 134 | + |
| 135 | + (* |
| 136 | + * Find the element before or at position pos. |
| 137 | + * If the position is before any element, returns None. Otherwise |
| 138 | + * returns Some (elt_pos, elt). |
| 139 | + *) |
| 140 | + let find dtree pos : (int * 'elt) option = |
| 141 | + dtree_find_pos 0 None dtree pos |
| 142 | + |
| 143 | + let find dtree pos = |
| 144 | + match find dtree pos with |
| 145 | + Some x -> x |
| 146 | + | None -> raise Not_found |
| 147 | + |
| 148 | + type 'elt insert_result = |
| 149 | + Promote of 'elt dtree * (int * 'elt) * 'elt dtree |
| 150 | + | Done of 'elt dtree |
| 151 | + |
| 152 | + (* Have to be careful to keep the deltas in shape while unwinding *) |
| 153 | + let unwind_insert d (t1, e1, t2, e2opt, t3) dir result = |
| 154 | + match result, e2opt, dir with |
| 155 | + (* These are the easy Done cases *) |
| 156 | + Done node, _, Left -> |
| 157 | + Done (Node (node, e1, t2, e2opt, t3)) |
| 158 | + | Done node, _, Center -> |
| 159 | + Done (Node (t1, e1, node, e2opt, t3)) |
| 160 | + | Done node, _, Right -> |
| 161 | + Done (Node (t1, e1, t2, e2opt, node)) |
| 162 | + |
| 163 | + (* These are the easy merge cases *) |
| 164 | + | Promote (tl, e, tr), None, Left -> |
| 165 | + let p = fst e in |
| 166 | + let p1 = fst e1 in |
| 167 | + let e1 = (p1 - p, snd e1) in |
| 168 | + Done (Node (tl, e, tr, Some e1, t2)) |
| 169 | + | Promote (tl, e, tr), None, Center -> |
| 170 | + Done (Node (t1, e1, tl, Some e, tr)) |
| 171 | + |
| 172 | + (* These are the merge cases that require promoting an element *) |
| 173 | + | Promote (tl, e, tr), Some e2, Left -> |
| 174 | + let left_tree = Node (tl, e, tr, None, Leaf) in |
| 175 | + let right_tree = Node (t2, e2, t3, None, Leaf) in |
| 176 | + Promote (left_tree, e1, right_tree) |
| 177 | + | Promote (tl, e, tr), Some e2, Center -> |
| 178 | + let p = fst e in |
| 179 | + let p1 = fst e1 in |
| 180 | + let p2 = fst e2 in |
| 181 | + let e = (p1 + p, snd e) in |
| 182 | + let e2 = (p2 - p, snd e2) in |
| 183 | + let left_tree = Node (t1, e1, tl, None, Leaf) in |
| 184 | + let right_tree = Node (tr, e2, t3, None, Leaf) in |
| 185 | + Promote (left_tree, e, right_tree) |
| 186 | + | Promote (tl, e, tr), Some e2, Right -> |
| 187 | + let p1 = fst e1 in |
| 188 | + let p2 = fst e2 in |
| 189 | + let e2 = (p2 + p1, snd e2) in |
| 190 | + let left_tree = Node (t1, e1, t2, None, Leaf) in |
| 191 | + let right_tree = Node (tl, e, tr, None, Leaf) in |
| 192 | + Promote (left_tree, e2, right_tree) |
| 193 | + | _ -> |
| 194 | + failwith "Internal error: unwind_insert" |
| 195 | + |
| 196 | + |
| 197 | + let rec dtree_insert_elt d dtree pos elt = |
| 198 | + match dtree with |
| 199 | + Node (t1, e1, t2, e2opt, t3) -> |
| 200 | + begin |
| 201 | + match dtree_pick_subtree d (t1, e1, t2, e2opt, t3) pos with |
| 202 | + MatchElt (dir, e) -> |
| 203 | + (* There's already an element at this position |
| 204 | + For now just replace it. Later maybe I should think |
| 205 | + about this more *) |
| 206 | + (* if debug then |
| 207 | + printf "match: %i (%s -> %s)\n" pos (snd e) elt; *) |
| 208 | + if dir = Left then |
| 209 | + Done (Node (t1, (pos-d, elt), t2, e2opt, t3)) |
| 210 | + else |
| 211 | + Done (Node (t1, e1, t2, Some (pos-(fst e1), elt), t3)) |
| 212 | + | Subtree (dir, lparent, d', tree) -> |
| 213 | + let insert_result = dtree_insert_elt d' tree pos elt in |
| 214 | + unwind_insert d (t1, e1, t2, e2opt, t3) dir |
| 215 | + insert_result |
| 216 | + end |
| 217 | + | Leaf -> |
| 218 | + (* if debug then |
| 219 | + printf "promote: %i, %s\n" (pos-d) elt; *) |
| 220 | + Promote (Leaf, (pos-d, elt), Leaf) |
| 221 | + |
| 222 | + let add dtree pos elt = |
| 223 | + match dtree_insert_elt 0 dtree pos elt with |
| 224 | + Done node -> |
| 225 | + node |
| 226 | + | Promote (tl, e, tr) -> |
| 227 | + Node (tl, e, tr, None, Leaf) |
| 228 | + |
| 229 | + (* let remove dtree pos = *) |
| 230 | + |
| 231 | + |
| 232 | +end |
| 233 | + |
| 234 | + |
| 235 | + |
| 236 | +let _ = |
| 237 | + let values = [5; 15; 10; 20; 30; 25; 35; 40; 1; 95; 3928; 298; 22; 17; 5] in |
| 238 | + (* let values = List.rev values in *) |
| 239 | + let values = List.map (fun x -> x, string_of_int x ^ "s" ) values in |
| 240 | + let print_tree = DeltaTree.print_tree (fun x-> "\"" ^ x ^ "\"") in |
| 241 | + let dt = DeltaTree.empty in |
| 242 | + let dt = List.fold_left |
| 243 | + (fun dt (loc, str) -> |
| 244 | + print_tree dt; print_newline (); |
| 245 | + DeltaTree.add dt loc str) |
| 246 | + dt values |
| 247 | + in |
| 248 | + print_tree dt |
| 249 | + |
0 commit comments