Skip to content
This repository was archived by the owner on Sep 12, 2018. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ script:
- cargo test --verbose
- cargo test --verbose -p edn
- cargo test --verbose -p mentat_query_parser
- cargo test --verbose -p mentat_tx_parser
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ clap = "2.19.3"

[dependencies.mentat_query_parser]
path = "query-parser"

[dependencies.mentat_tx_parser]
path = "tx-parser"
2 changes: 1 addition & 1 deletion edn/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use num::BigInt;
use ordered_float::OrderedFloat;

/// Value represents one of the allowed values in an EDN string.
#[derive(PartialEq, Eq, Hash, Debug)]
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub enum Value {
Nil,
Boolean(bool),
Expand Down
12 changes: 12 additions & 0 deletions tx-parser/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "mentat_tx_parser"
version = "0.0.1"

[dependencies]
combine = "2.1.1"

[dependencies.edn]
path = "../edn"

[dependencies.mentat_tx]
path = "../tx"
337 changes: 337 additions & 0 deletions tx-parser/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#![allow(dead_code)]

extern crate edn;
extern crate combine;
extern crate mentat_tx;

use combine::{any, eof, many, optional, parser, satisfy_map, token, Parser, ParseResult, Stream};
use combine::combinator::{Expected, FnParser};
use edn::symbols::NamespacedKeyword;
use edn::types::Value;
use mentat_tx::entities::*;

pub struct Tx<I>(::std::marker::PhantomData<fn(I) -> I>);

type TxParser<O, I> = Expected<FnParser<I, fn(I) -> ParseResult<O, I>>>;

fn fn_parser<O, I>(f: fn(I) -> ParseResult<O, I>, err: &'static str) -> TxParser<O, I>
where I: Stream<Item = Value>
{
parser(f).expected(err)
}

impl<I> Tx<I>
where I: Stream<Item = Value>
{
fn integer() -> TxParser<i64, I> {
fn_parser(Tx::<I>::integer_, "integer")
}

fn integer_(input: I) -> ParseResult<i64, I> {
return satisfy_map(|x: Value| if let Value::Integer(y) = x {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've read combinator.rs, and do not claim to understand it. But this is a nice abstraction for allowing failure to arbitrary depth.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

combinator.rs is tricky; it took me three days of digging to really understand what is happening :)

Some(y)
} else {
None
})
.parse_stream(input);
}

fn keyword() -> TxParser<NamespacedKeyword, I> {
fn_parser(Tx::<I>::keyword_, "keyword")
}

fn keyword_(input: I) -> ParseResult<NamespacedKeyword, I> {
return satisfy_map(|x: Value| if let Value::NamespacedKeyword(y) = x {
Some(y)
} else {
None
})
.parse_stream(input);
}

fn entid() -> TxParser<Entid, I> {
fn_parser(Tx::<I>::entid_, "entid")
}

fn entid_(input: I) -> ParseResult<Entid, I> {
let p = Tx::<I>::integer()
.map(|x| Entid::Entid(x))
.or(Tx::<I>::keyword().map(|x| Entid::Ident(x)))
.parse_lazy(input)
.into();
return p;
}

fn lookup_ref() -> TxParser<LookupRef, I> {
fn_parser(Tx::<I>::lookup_ref_, "lookup-ref")
}

fn lookup_ref_(input: I) -> ParseResult<LookupRef, I> {
return satisfy_map(|x: Value| if let Value::Vector(y) = x {
let mut p = (Tx::<&[Value]>::entid(), any(), eof())
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the parser library memoize the construction of these parsers? Should I be worried about multiple invocations of fn_parser?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, and there's a subtle thing happening with consuming Parser instances that I don't yet understand. You'll note all these parsers are mutable; that's because they're consumed by parse and friends. So you can't avoid the invocations and allocations. It's unclear how this impacts performance overall.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because FnParsers (and probably other parsers, too) are implicitly stateful — they wrap a function, and there's no concept in Rust of a pure function?

(Indeed, with 'try' for lookahead, one expects LL(n) parser implementations to be stateful.)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the parser library memoize the construction of these parsers? Should I be worried about multiple invocations of fn_parser?

Author of combine here. Constructing parsers should be free/very cheap since Parser follow the same model as Iterator so its all stack allocations and since most parsers are either zero-sized or just a few bytes for the function or parameters they take to construct them. As long as LLVM inlines properly there should be zero overhead.

(Indeed, with 'try' for lookahead, one expects LL(n) parser implementations to be stateful.)

That is almost true but the try parser has no state itself, that is all contained in Parser::Input (try, the only thing it contains is the parser it wraps).

.map(|(a, v, _)| LookupRef { a: a, v: v });
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

p ensures that _ in this case will always be the eof terminal, yes?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. A tuple implements Parser based on the types of its elements, and the output of the tuple parser is the tupled output of its contained parsers.

let r = p.parse_lazy(&y[..]).into();
match r {
Ok((r, _)) => Some(r),
_ => None,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You use this pattern elsewhere. At the risk of sending you down half an hour of wrestling with generics and lifetimes, is there a phrasing of this as:

fn result_to_option<T, E>(r: result<T, E>) -> Option<T> {
    match r {
        OK((r, _)) => Some(r),
        _ => None,
    }
}

that you can reuse?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I eventually figured out how to extract the whole in-a-vector pattern, but it's really hard to express as function composition. The combine pattern (and, more generally, the Rust pattern -- I think) is to extract a struct that implements the Parser trait. I know how to do this but it's finicky. It'll be good to add such a function for query-parser, however, so I'll get to it eventually.

}
} else {
None
})
.parse_stream(input);
}

fn entid_or_lookup_ref() -> TxParser<EntidOrLookupRef, I> {
fn_parser(Tx::<I>::entid_or_lookup_ref_, "entid|lookup-ref")
}

fn entid_or_lookup_ref_(input: I) -> ParseResult<EntidOrLookupRef, I> {
let p = Tx::<I>::entid()
.map(|x| EntidOrLookupRef::Entid(x))
.or(Tx::<I>::lookup_ref().map(|x| EntidOrLookupRef::LookupRef(x)))
.parse_lazy(input)
.into();
return p;
}

// TODO: abstract the "match Vector, parse internal stream" pattern to remove this boilerplate.
fn add_(input: I) -> ParseResult<Entity, I> {
return satisfy_map(|x: Value| -> Option<Entity> {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I may, I believe you can change this parser into this to preserve errors from parsing the inner value.

satisfy_map(|x: Value| -> Option<Entity> {
 if let Value::Vector(y) = x { Some(y) } else { None }
}).flat_map(|y| {
    let  mut p = ...;
    p.parse(&y[..]).map(|t| t.0)
})

https://docs.rs/combine/2.1.1/combine/trait.Parser.html#method.flat_map

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I try this, I get errors like:

error[E0271]: type mismatch resolving `<parse::combine::combinator::SatisfyMap<&[error::edn::Value], [closure@query-parser/src/parse.rs:95:20: 95:94]> as parse::combine::Parser>::Input == I`
   --> query-parser/src/parse.rs:101:9
    |
101 |        .parse_stream(input)
    |         ^^^^^^^^^^^^ expected reference, found type parameter
    |
    = note: expected type `&[error::edn::Value]`
    = note:    found type `I`
    = note: required because of the requirements on the impl of `parse::combine::Parser` for `parse::combine::combinator::FlatMap<parse::combine::combinator::SatisfyMap<&[error::edn::Value], [closure@query-parser/src/parse.rs:95:20: 95:94]>, [closure@query-parser/src/parse.rs:96:22: 100:9]>`

so this isn't a trivial change.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. In order to do this, I needed to either use SliceStream or work with the ::Range type, so that the lifetime was preserved. It can be done, but it's awkward. See also the discussion in Marwes/combine#74 (comment).

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/me gives up, runs git checkout -- .

if let Value::Vector(y) = x {
let mut p = (token(Value::NamespacedKeyword(NamespacedKeyword::new("db",
"add"))),
Tx::<&[Value]>::entid_or_lookup_ref(),
Tx::<&[Value]>::entid(),
// TODO: handle lookup-ref.
any(),
// TODO: entid or special keyword :db/tx?
optional(Tx::<&[Value]>::entid()),
eof())
.map(|(_, e, a, v, tx, _)| {
Entity::Add {
e: e,
a: a,
v: ValueOrLookupRef::Value(v),
tx: tx,
}
});
// TODO: use ok() with a type annotation rather than explicit match.
match p.parse_lazy(&y[..]).into() {
Ok((r, _)) => Some(r),
_ => None,
}
} else {
None
}
})
.parse_stream(input);
}

fn add() -> TxParser<Entity, I> {
fn_parser(Tx::<I>::add_, "[:db/add e a v tx?]")
}

fn retract_(input: I) -> ParseResult<Entity, I> {
return satisfy_map(|x: Value| -> Option<Entity> {
if let Value::Vector(y) = x {
let mut p = (token(Value::NamespacedKeyword(NamespacedKeyword::new("db",
"retract"))),
Tx::<&[Value]>::entid_or_lookup_ref(),
Tx::<&[Value]>::entid(),
// TODO: handle lookup-ref.
any(),
eof())
.map(|(_, e, a, v, _)| {
Entity::Retract {
e: e,
a: a,
v: ValueOrLookupRef::Value(v),
}
});
// TODO: use ok() with a type annotation rather than explicit match.
match p.parse_lazy(&y[..]).into() {
Ok((r, _)) => Some(r),
_ => None,
}
} else {
None
}
})
.parse_stream(input);
}

fn retract() -> TxParser<Entity, I> {
fn_parser(Tx::<I>::retract_, "[:db/retract e a v]")
}

fn retract_attribute_(input: I) -> ParseResult<Entity, I> {
return satisfy_map(|x: Value| -> Option<Entity> {
if let Value::Vector(y) = x {
let mut p = (token(Value::NamespacedKeyword(NamespacedKeyword::new("db", "retractAttribute"))),
Tx::<&[Value]>::entid_or_lookup_ref(),
Tx::<&[Value]>::entid(),
eof())
.map(|(_, e, a, _)| Entity::RetractAttribute { e: e, a: a });
// TODO: use ok() with a type annotation rather than explicit match.
match p.parse_lazy(&y[..]).into() {
Ok((r, _)) => Some(r),
_ => None,
}
} else {
None
}
})
.parse_stream(input);
}

fn retract_attribute() -> TxParser<Entity, I> {
fn_parser(Tx::<I>::retract_attribute_, "[:db/retractAttribute e a]")
}

fn retract_entity_(input: I) -> ParseResult<Entity, I> {
return satisfy_map(|x: Value| -> Option<Entity> {
if let Value::Vector(y) = x {
let mut p =
(token(Value::NamespacedKeyword(NamespacedKeyword::new("db",
"retractEntity"))),
Tx::<&[Value]>::entid_or_lookup_ref(),
eof())
.map(|(_, e, _)| Entity::RetractEntity { e: e });
// TODO: use ok() with a type annotation rather than explicit match.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might render my earlier point moot.

match p.parse_lazy(&y[..]).into() {
Ok((r, _)) => Some(r),
_ => None,
}
} else {
None
}
})
.parse_stream(input);
}

fn retract_entity() -> TxParser<Entity, I> {
fn_parser(Tx::<I>::retract_entity_, "[:db/retractEntity e]")
}

fn entity_(input: I) -> ParseResult<Entity, I> {
let mut p = Tx::<I>::add()
.or(Tx::<I>::retract())
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really nice.

.or(Tx::<I>::retract_attribute())
.or(Tx::<I>::retract_entity());
p.parse_stream(input)
}

fn entity() -> TxParser<Entity, I> {
fn_parser(Tx::<I>::entity_,
"[:db/add|:db/retract|:db/retractAttribute|:db/retractEntity ...]")
}

fn entities_(input: I) -> ParseResult<Vec<Entity>, I> {
return satisfy_map(|x: Value| -> Option<Vec<Entity>> {
if let Value::Vector(y) = x {
let mut p = (many(Tx::<&[Value]>::entity()), eof()).map(|(es, _)| es);
// TODO: use ok() with a type annotation rather than explicit match.
match p.parse_lazy(&y[..]).into() {
Ok((r, _)) => Some(r),
_ => None,
}
} else {
None
}
})
.parse_stream(input);
}

fn entities() -> TxParser<Vec<Entity>, I> {
fn_parser(Tx::<I>::entities_,
"[[:db/add|:db/retract|:db/retractAttribute|:db/retractEntity ...]*]")
}

pub fn parse(input: I) -> Result<Vec<Entity>, combine::ParseError<I>> {
(Tx::<I>::entities(), eof())
.map(|(es, _)| es)
.parse(input)
.map(|x| x.0)
}
}

#[cfg(test)]
mod tests {
use super::*;
use combine::Parser;
use edn::symbols::NamespacedKeyword;
use edn::types::Value;
use mentat_tx::entities::*;

fn kw(namespace: &str, name: &str) -> Value {
Value::NamespacedKeyword(NamespacedKeyword::new(namespace, name))
}

#[test]
fn test_add() {
let input = [Value::Vector(vec![kw("db", "add"),
kw("test", "entid"),
kw("test", "a"),
Value::Text("v".into())])];
let mut parser = Tx::entity();
let result = parser.parse(&input[..]);
assert_eq!(result,
Ok((Entity::Add {
e: EntidOrLookupRef::Entid(Entid::Ident(NamespacedKeyword::new("test",
"entid"))),
a: Entid::Ident(NamespacedKeyword::new("test", "a")),
v: ValueOrLookupRef::Value(Value::Text("v".into())),
tx: None,
},
&[][..])));
}

#[test]
fn test_retract() {
let input = [Value::Vector(vec![kw("db", "retract"),
Value::Integer(101),
kw("test", "a"),
Value::Text("v".into())])];
let mut parser = Tx::entity();
let result = parser.parse(&input[..]);
assert_eq!(result,
Ok((Entity::Retract {
e: EntidOrLookupRef::Entid(Entid::Entid(101)),
a: Entid::Ident(NamespacedKeyword::new("test", "a")),
v: ValueOrLookupRef::Value(Value::Text("v".into())),
},
&[][..])));
}

#[test]
fn test_lookup_ref() {
let input = [Value::Vector(vec![kw("db", "add"),
Value::Vector(vec![kw("test", "a1"),
Value::Text("v1".into())]),
kw("test", "a"),
Value::Text("v".into())])];
let mut parser = Tx::entity();
let result = parser.parse(&input[..]);
assert_eq!(result,
Ok((Entity::Add {
e: EntidOrLookupRef::LookupRef(LookupRef {
a: Entid::Ident(NamespacedKeyword::new("test", "a1")),
v: Value::Text("v1".into()),
}),
a: Entid::Ident(NamespacedKeyword::new("test", "a")),
v: ValueOrLookupRef::Value(Value::Text("v".into())),
tx: None,
},
&[][..])));
}
}
Loading