mozilla · ncalexan · Feb 8, 2017 · Feb 8, 2017 · Jan 26, 2017 · Feb 8, 2017
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,9 +9,10 @@ nickel = "0.9.0"
 slog = "1.4.0"
 slog-scope = "0.2.2"
 slog-term = "1.3.4"
+time = "0.1.35"
 
 [dependencies.rusqlite]
-version = "0.9.3"
+version = "0.9.5"
 # System sqlite might be very old.
 features = ["bundled"]
 

diff --git a/db/Cargo.toml b/db/Cargo.toml
@@ -4,10 +4,13 @@ version = "0.0.1"
 
 [dependencies]
 error-chain = "0.8.0"
+itertools = "0.5.9"
 lazy_static = "0.2.2"
+ordered-float = "0.4.0"
+time = "0.1.35"
 
 [dependencies.rusqlite]
-version = "0.9.3"
+version = "0.9.5"
 # System sqlite might be very old.
 features = ["bundled"]
 
@@ -22,3 +25,7 @@ path = "../tx"
 
 [dependencies.mentat_tx_parser]
 path = "../tx-parser"
+
+# Should be dev-dependencies.
+[dependencies.tabwriter]
+version = "1.0.3"
diff --git a/db/src/bootstrap.rs b/db/src/bootstrap.rs
@@ -10,7 +10,7 @@
 
 #![allow(dead_code)]
 
-use {to_namespaced_keyword};
+use ::{to_namespaced_keyword};
 use edn;
 use edn::types::Value;
 use entids;
@@ -20,6 +20,11 @@ use mentat_tx_parser;
 use types::{IdentMap, Partition, PartitionMap, Schema, TypedValue};
 use values;
 
+/// The first transaction ID applied to the knowledge base.
+///
+/// This is the start of the :db.part/tx partition.
+pub const TX0: i64 = 0x10000000;
+
 lazy_static! {
     static ref V1_IDENTS: Vec<(&'static str, i64)> = {
         vec![(":db/ident",             entids::DB_IDENT),
@@ -70,14 +75,14 @@ lazy_static! {
     static ref V1_PARTS: Vec<(&'static str, i64, i64)> = {
         vec![(":db.part/db", 0, (1 + V1_IDENTS.len()) as i64),
              (":db.part/user", 0x10000, 0x10000),
-             (":db.part/tx", 0x10000000, 0x10000000),
+             (":db.part/tx", TX0, TX0),
         ]
     };
 
     static ref V2_PARTS: Vec<(&'static str, i64, i64)> = {
         vec![(":db.part/db", 0, (1 + V2_IDENTS.len()) as i64),
              (":db.part/user", 0x10000, 0x10000),
-             (":db.part/tx", 0x10000000, 0x10000000),
+             (":db.part/tx", TX0, TX0),
         ]
     };
 

diff --git a/db/src/db.rs b/db/src/db.rs
diff --git a/db/src/debug.rs b/db/src/debug.rs
@@ -12,55 +12,203 @@
 
 /// Low-level functions for testing.
 
+use std::collections::{BTreeSet};
+use std::io::{Write};
+
+use itertools::Itertools;
 use rusqlite;
+use rusqlite::types::{ToSql};
+use tabwriter::TabWriter;
 
-use {to_namespaced_keyword};
-use edn::types::{Value};
+use ::{to_namespaced_keyword};
+use bootstrap;
+use edn;
+use edn::symbols;
+use entids;
 use mentat_tx::entities::{Entid};
 use types::{DB, TypedValue};
 use errors::Result;
 
-/// Represents an assertion (*datom*) in the store.
+/// Represents a *datom* (assertion) in the store.
 #[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
 pub struct Datom {
     // TODO: generalize this.
     e: Entid,
     a: Entid,
-    v: Value,
-    tx: Option<i64>,
+    v: edn::Value,
+    tx: i64,
+    added: Option<bool>,
+}
+
+/// Represents a set of datoms (assertions) in the store.
+pub struct Datoms(pub BTreeSet<Datom>);
+
+/// Represents an ordered sequence of transactions in the store.
+pub struct Transactions(pub Vec<Datoms>);
+
+fn label_tx_id(tx: i64) -> edn::Value {
+    edn::Value::PlainSymbol(symbols::PlainSymbol::new(format!("?tx{}", tx - bootstrap::TX0)))
+}
+
+fn label_tx_instant(tx: i64) -> edn::Value {
+    edn::Value::PlainSymbol(symbols::PlainSymbol::new(format!("?ms{}", tx - bootstrap::TX0)))
+}
+
+impl Datom {
+    pub fn into_edn<T, U>(&self, tx_id: T, tx_instant: &U) -> edn::Value
+        where T: Fn(i64) -> edn::Value, U: Fn(i64) -> edn::Value {
+        let f = |entid: &Entid| -> edn::Value {
+            match *entid {
+                Entid::Entid(ref y) => edn::Value::Integer(y.clone()),
+                Entid::Ident(ref y) => edn::Value::NamespacedKeyword(y.clone()),
+            }
+        };
+
+        // Rewrite [E :db/txInstant V] to [?txN :db/txInstant ?t0].
+        let mut v = if self.a == Entid::Entid(entids::DB_TX_INSTANT) || self.a == Entid::Ident(to_namespaced_keyword(":db/txInstant").unwrap()) {
+            vec![tx_id(self.tx),
+                 f(&self.a),
+                 tx_instant(self.tx)]
+        } else {
+            vec![f(&self.e), f(&self.a), self.v.clone()]
+        };
+        if let Some(added) = self.added {
+            v.push(tx_id(self.tx));
+            v.push(edn::Value::Boolean(added));
+        }
+
+        edn::Value::Vector(v)
+    }
 }
 
-/// Return the complete set of datoms in the store, ordered by (e, a, v).
-pub fn datoms(conn: &rusqlite::Connection, db: &DB) -> Result<Vec<Datom>> {
-    // TODO: fewer magic numbers!
-    datoms_after(conn, db, &0x10000000)
+impl Datoms {
+    pub fn into_edn_raw<T, U>(&self, tx_id: &T, tx_instant: &U) -> edn::Value
+        where T: Fn(i64) -> edn::Value, U: Fn(i64) -> edn::Value {
+        edn::Value::Set((&self.0).into_iter().map(|x| x.into_edn(tx_id, tx_instant)).collect())
+    }
+
+    pub fn into_edn(&self) -> edn::Value {
+        self.into_edn_raw(&label_tx_id, &label_tx_instant)
+    }
+}
+
+impl Transactions {
+    pub fn into_edn_raw<T, U>(&self, tx_id: &T, tx_instant: &U) -> edn::Value
+        where T: Fn(i64) -> edn::Value, U: Fn(i64) -> edn::Value {
+        edn::Value::Vector((&self.0).into_iter().map(|x| x.into_edn_raw(tx_id, tx_instant)).collect())
+    }
+
+    pub fn into_edn(&self) -> edn::Value {
+        self.into_edn_raw(&label_tx_id, &label_tx_instant)
+    }
+}
+
+/// Convert a numeric entid to an ident `Entid` if possible, otherwise a numeric `Entid`.
+fn to_entid(db: &DB, entid: i64) -> Entid {
+    db.schema.get_ident(&entid).and_then(|ident| to_namespaced_keyword(&ident)).map_or(Entid::Entid(entid), Entid::Ident)
 }
 
-/// Return the set of datoms in the store with transaction ID strictly
-/// greater than the given `tx`, ordered by (tx, e, a, v).
-pub fn datoms_after(conn: &rusqlite::Connection, db: &DB, tx: &i32) -> Result<Vec<Datom>> {
-    let mut stmt: rusqlite::Statement = conn.prepare("SELECT e, a, v, value_type_tag FROM datoms WHERE tx > ? ORDER BY tx, e, a, v")?;
+/// Return the set of datoms in the store, ordered by (e, a, v, tx), but not including any datoms of
+/// the form [... :db/txInstant ...].
+pub fn datoms(conn: &rusqlite::Connection, db: &DB) -> Result<Datoms> {
+    datoms_after(conn, db, bootstrap::TX0 - 1)
+}
+
+/// Return the set of datoms in the store with transaction ID strictly greater than the given `tx`,
+/// ordered by (e, a, v, tx).
+///
+/// The datom set returned does not include any datoms of the form [... :db/txInstant ...].
+pub fn datoms_after(conn: &rusqlite::Connection, db: &DB, tx: i64) -> Result<Datoms> {
+    let mut stmt: rusqlite::Statement = conn.prepare("SELECT e, a, v, value_type_tag, tx FROM datoms WHERE tx > ? ORDER BY e ASC, a ASC, v ASC, tx ASC")?;
+
+    let r: Result<Vec<_>> = stmt.query_and_then(&[&tx], |row| {
+        let e: i64 = row.get_checked(0)?;
+        let a: i64 = row.get_checked(1)?;
+
+        if a == entids::DB_TX_INSTANT {
+            return Ok(None);
+        }
+
+        let v: rusqlite::types::Value = row.get_checked(2)?;
+        let value_type_tag: i32 = row.get_checked(3)?;
+
+        let typed_value = TypedValue::from_sql_value_pair(v, value_type_tag)?;
+        let (value, _) = typed_value.to_edn_value_pair();
+
+        let tx: i64 = row.get_checked(4)?;
+
+        Ok(Some(Datom {
+            e: to_entid(db, e),
+            a: to_entid(db, a),
+            v: value,
+            tx: tx,
+            added: None,
+        }))
+    })?.collect();
+
+    Ok(Datoms(r?.into_iter().filter_map(|x| x).collect()))
+}
 
-    // Convert numeric entid to entity Entid.
-    let to_entid = |x| {
-        db.schema.get_ident(&x).and_then(|y| to_namespaced_keyword(&y)).map(Entid::Ident).unwrap_or(Entid::Entid(x))
-    };
+/// Return the sequence of transactions in the store with transaction ID strictly greater than the
+/// given `tx`, ordered by (tx, e, a, v).
+///
+/// Each transaction returned includes the [:db/tx :db/txInstant ...] datom.
+pub fn transactions_after(conn: &rusqlite::Connection, db: &DB, tx: i64) -> Result<Transactions> {
+    let mut stmt: rusqlite::Statement = conn.prepare("SELECT e, a, v, value_type_tag, tx, added FROM transactions WHERE tx > ? ORDER BY tx ASC, e ASC, a ASC, v ASC, added ASC")?;
 
-    let datoms = stmt.query_and_then(&[tx], |row| {
+    let r: Result<Vec<_>> = stmt.query_and_then(&[&tx], |row| {
         let e: i64 = row.get_checked(0)?;
         let a: i64 = row.get_checked(1)?;
+
         let v: rusqlite::types::Value = row.get_checked(2)?;
         let value_type_tag: i32 = row.get_checked(3)?;
 
-        let typed_value = TypedValue::from_sql_value_pair(v, &value_type_tag)?;
+        let typed_value = TypedValue::from_sql_value_pair(v, value_type_tag)?;
         let (value, _) = typed_value.to_edn_value_pair();
 
+        let tx: i64 = row.get_checked(4)?;
+        let added: bool = row.get_checked(5)?;
+
         Ok(Datom {
-            e: to_entid(e),
-            a: to_entid(a),
+            e: to_entid(db, e),
+            a: to_entid(db, a),
             v: value,
-            tx: None,
+            tx: tx,
+            added: Some(added),
         })
     })?.collect();
-    datoms
+
+    // Group by tx.
+    let r: Vec<Datoms> = r?.into_iter().group_by(|x| x.tx).into_iter().map(|(_key, group)| Datoms(group.collect())).collect();
+    Ok(Transactions(r))
+}
+
+/// Execute the given `sql` query with the given `params` and format the results as a
+/// tab-and-newline formatted string suitable for debug printing.
+///
+/// The query is printed followed by a newline, then the returned columns followed by a newline, and
+/// then the data rows and columns.  All columns are aligned.
+pub fn dump_sql_query(conn: &rusqlite::Connection, sql: &str, params: &[&ToSql]) -> Result<String> {
+    let mut stmt: rusqlite::Statement = conn.prepare(sql)?;
+
+    let mut tw = TabWriter::new(Vec::new()).padding(2);
+    write!(&mut tw, "{}\n", sql).unwrap();
+
+    for column_name in stmt.column_names() {
+        write!(&mut tw, "{}\t", column_name).unwrap();
+    }
+    write!(&mut tw, "\n").unwrap();
+
+    let r: Result<Vec<_>> = stmt.query_and_then(params, |row| {
+        for i in 0..row.column_count() {
+            let value: rusqlite::types::Value = row.get_checked(i)?;
+            write!(&mut tw, "{:?}\t", value).unwrap();
+        }
+        write!(&mut tw, "\n").unwrap();
+        Ok(())
+    })?.collect();
+    r?;
+
+    let dump = String::from_utf8(tw.into_inner().unwrap()).unwrap();
+    Ok(dump)
 }
diff --git a/db/src/lib.rs b/db/src/lib.rs
@@ -10,14 +10,21 @@
 
 #[macro_use]
 extern crate error_chain;
+extern crate itertools;
 #[macro_use]
 extern crate lazy_static;
 extern crate rusqlite;
+extern crate time;
+
+extern crate tabwriter;
 
 extern crate edn;
 extern crate mentat_tx;
 extern crate mentat_tx_parser;
 
+use itertools::Itertools;
+use std::iter::repeat;
+
 pub use errors::*;
 pub use schema::*;
 pub use types::*;
@@ -33,6 +40,8 @@ mod values;
 
 use edn::symbols;
 
+pub const SQLITE_MAX_VARIABLE_NUMBER: usize = 999;
+
 pub fn to_namespaced_keyword(s: &str) -> Option<symbols::NamespacedKeyword> {
     let splits = [':', '/'];
     let mut i = s.split(&splits[..]);
@@ -41,3 +50,26 @@ pub fn to_namespaced_keyword(s: &str) -> Option<symbols::NamespacedKeyword> {
         _ => None
     }
 }
+
+/// Prepare an SQL `VALUES` block, like (?, ?, ?), (?, ?, ?).
+///
+/// The number of values per tuple determines  `(?, ?, ?)`.  The number of tuples determines `(...), (...)`.
+///
+/// # Examples
+///
+/// ```rust
+/// # use mentat_db::{repeat_values};
+/// assert_eq!(repeat_values(1, 3), "(?), (?), (?)".to_string());
+/// assert_eq!(repeat_values(3, 1), "(?, ?, ?)".to_string());
+/// assert_eq!(repeat_values(2, 2), "(?, ?), (?, ?)".to_string());
+/// ```
+pub fn repeat_values(values_per_tuple: usize, tuples: usize) -> String {
+    assert!(values_per_tuple >= 1);
+    assert!(tuples >= 1);
+    assert!(values_per_tuple * tuples < SQLITE_MAX_VARIABLE_NUMBER, "Too many values: {} * {} >= {}", values_per_tuple, tuples, SQLITE_MAX_VARIABLE_NUMBER);
+    // Like "(?, ?, ?)".
+    let inner = format!("({})", repeat("?").take(values_per_tuple).join(", "));
+    // Like "(?, ?, ?), (?, ?, ?)".
+    let values: String = repeat(inner).take(tuples).join(", ");
+    values
+}