Skip to content
This repository was archived by the owner on Sep 12, 2018. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions edn/src/edn.rustpeg
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@

use std::collections::{BTreeSet, BTreeMap, LinkedList};
use std::iter::FromIterator;

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something I miss from Java was a rough standard of individual imports, sorted alphabetically, inserted by some refactoring and automatically folded by the editor. Minimal code churn which the developer didn't need to think about.
I suspect that rust tooling will catch up shortly. So nothing to do now, mostly thinking aloud.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I decided to do much what we've been doing in Swift and Java: split std out, sort all alphabetically within that division.

use num::BigInt;
use types::Value;
use ordered_float::OrderedFloat;
use types;
use types::Value;

// Goal: Be able to parse https://github.com/edn-format/edn
// Also extensible to help parse http://docs.datomic.com/query.html
Expand Down Expand Up @@ -71,23 +73,36 @@ text -> Value = "\"" t:$( char* ) "\"" {
Value::Text(t.to_string())
}

namespace_divider = "."
namespace_separator = "/"

// TODO: Be more picky here
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.]
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] / "-"
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]

#[export]
symbol -> Value = s:$( symbol_char_initial symbol_char_subsequent* ) {
Value::Symbol(s.to_string())
}
symbol_namespace = symbol_char_initial+ (namespace_divider symbol_char_subsequent+)*
symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." )

keyword_prefix = ":"

keyword_char_initial = ":"
// TODO: More chars here?
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment still relevant? _, *, and - seem like good name{space} chars...

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided not to attack that yet. Start narrow and broaden later.

keyword_char_subsequent = [a-z] / [A-Z] / [0-9] / "/"
keyword_namespace_char = [a-z] / [A-Z] / [0-9]
keyword_namespace = keyword_namespace_char+ (namespace_divider keyword_namespace_char+)*

keyword_name_char = [a-z] / [A-Z] / [0-9] / "."
keyword_name = keyword_name_char+

#[export]
keyword -> Value = k:$( keyword_char_initial keyword_char_subsequent+ ) {
Value::Keyword(k.to_string())
}
symbol -> Value
= ns:( sns:$(symbol_namespace) namespace_separator { sns })? n:$(symbol_name) {
types::to_symbol(ns, n)
}

#[export]
keyword -> Value
= keyword_prefix ns:( kns:$(keyword_namespace) namespace_separator { kns })? n:$(keyword_name) {
types::to_keyword(ns, n)
}

#[export]
list -> Value = "(" __ v:(__ value)* __ ")" {
Expand Down
86 changes: 0 additions & 86 deletions edn/src/keyword.rs

This file was deleted.

2 changes: 1 addition & 1 deletion edn/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
extern crate ordered_float;
extern crate num;

pub mod keyword;
pub mod symbols;
pub mod types;

pub mod parse {
Expand Down
164 changes: 164 additions & 0 deletions edn/src/symbols.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

/// A simplification of Clojure's Symbol.
#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
pub struct PlainSymbol(pub String);

#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
pub struct NamespacedSymbol {
// We derive PartialOrd, which implements a lexicographic based
// on the order of members, so put namespace first.
pub namespace: String,
pub name: String,
}

/// A keyword is a symbol, optionally with a namespace, that prints with a leading colon.
/// This concept is imported from Clojure, as it features in EDN and the query
/// syntax that we use.
///
/// Clojure's constraints are looser than ours, allowing empty namespaces or
/// names:
///
/// ```clojure
/// user=> (keyword "" "")
/// :/
/// user=> (keyword "foo" "")
/// :foo/
/// user=> (keyword "" "bar")
/// :/bar
/// ```
///
/// We think that's nonsense, so we only allow keywords like `:bar` and `:foo/bar`,
/// with both namespace and main parts containing no whitespace and no colon or slash:
///
/// ```rust
/// # use edn::symbols::Keyword;
/// # use edn::symbols::NamespacedKeyword;
/// let bar = Keyword::new("bar"); // :bar
/// let foo_bar = NamespacedKeyword::new("foo", "bar"); // :foo/bar
/// assert_eq!("bar", bar.0);
/// assert_eq!("bar", foo_bar.name);
/// assert_eq!("foo", foo_bar.namespace);
/// ```
///
/// If you're not sure whether your input is well-formed, you should use a
/// parser or a reader function first to validate. TODO: implement `read`.
///
/// Callers are expected to follow these rules:
/// http://www.clojure.org/reference/reader#_symbols
///
/// Future: fast equality (interning?) for keywords.
///
#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
pub struct Keyword(pub String);

#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
pub struct NamespacedKeyword {
// We derive PartialOrd, which implements a lexicographic based
// on the order of members, so put namespace first.
pub namespace: String,
pub name: String,
}

impl PlainSymbol {
pub fn new(name: &str) -> Self {
assert!(!name.is_empty(), "Symbols cannot be unnamed.");

return PlainSymbol(name.to_string());
}
}

impl NamespacedSymbol {
pub fn new(namespace: &str, name: &str) -> Self {
assert!(!name.is_empty(), "Symbols cannot be unnamed.");
assert!(!namespace.is_empty(), "Symbols cannot have an empty non-null namespace.");

return NamespacedSymbol { name: name.to_string(), namespace: namespace.to_string() };
}
}

impl Keyword {
pub fn new(name: &str) -> Self {
assert!(!name.is_empty(), "Keywords cannot be unnamed.");

return Keyword(name.to_string());
}
}

impl NamespacedKeyword {
pub fn new(namespace: &str, name: &str) -> Self {
assert!(!name.is_empty(), "Keywords cannot be unnamed.");
assert!(!namespace.is_empty(), "Keywords cannot have an empty non-null namespace.");

// TODO: debug asserts to ensure that neither field matches [ :/].
return NamespacedKeyword { name: name.to_string(), namespace: namespace.to_string() };
}
}

//
// Note that we don't currently do any escaping.
//

impl ToString for PlainSymbol {
/// Print the symbol in EDN format.
///
/// # Examples
///
/// ```rust
/// # use edn::symbols::PlainSymbol;
/// assert_eq!("baz", PlainSymbol::new("baz").to_string());
/// ```
fn to_string(&self) -> String {
return format!("{}", self.0);
}
}

impl ToString for NamespacedSymbol {
/// Print the symbol in EDN format.
///
/// # Examples
///
/// ```rust
/// # use edn::symbols::NamespacedSymbol;
/// assert_eq!("bar/baz", NamespacedSymbol::new("bar", "baz").to_string());
/// ```
fn to_string(&self) -> String {
return format!("{}/{}", self.namespace, self.name);
}
}

impl ToString for Keyword {
/// Print the keyword in EDN format.
///
/// # Examples
///
/// ```rust
/// # use edn::symbols::Keyword;
/// assert_eq!(":baz", Keyword::new("baz").to_string());
/// ```
fn to_string(&self) -> String {
return format!(":{}", self.0);
}
}

impl ToString for NamespacedKeyword {
/// Print the keyword in EDN format.
///
/// # Examples
///
/// ```rust
/// # use edn::symbols::NamespacedKeyword;
/// assert_eq!(":bar/baz", NamespacedKeyword::new("bar", "baz").to_string());
/// ```
fn to_string(&self) -> String {
return format!(":{}/{}", self.namespace, self.name);
}
}
42 changes: 33 additions & 9 deletions edn/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

use std::collections::{BTreeSet, BTreeMap, LinkedList};
use std::cmp::{Ordering, Ord, PartialOrd};

use symbols;
use num::BigInt;
use ordered_float::OrderedFloat;

Expand All @@ -23,8 +25,10 @@ pub enum Value {
// https://users.rust-lang.org/t/hashmap-key-cant-be-float-number-type-why/7892
Float(OrderedFloat<f64>),
Text(String),
Symbol(String),
Keyword(String),
PlainSymbol(symbols::PlainSymbol),
NamespacedSymbol(symbols::NamespacedSymbol),
Keyword(symbols::Keyword),
NamespacedKeyword(symbols::NamespacedKeyword),
Vector(Vec<Value>),
List(LinkedList<Value>),
// We're using BTree{Set, Map} rather than Hash{Set, Map} because the BTree variants
Expand Down Expand Up @@ -55,8 +59,12 @@ impl Ord for Value {
Integer(is) => match *other { Integer(io) => io.cmp(&is), _ => ord_order },
Float(ref fs) => match *other { Float(ref fo) => fo.cmp(&fs), _ => ord_order },
Text(ref ts) => match *other { Text(ref to) => to.cmp(&ts), _ => ord_order },
Symbol(ref ss) => match *other { Symbol(ref so) => so.cmp(&ss), _ => ord_order },
PlainSymbol(ref ss) => match *other { PlainSymbol(ref so) => so.cmp(&ss), _ => ord_order },
NamespacedSymbol(ref ss)
=> match *other { NamespacedSymbol(ref so) => so.cmp(&ss), _ => ord_order },
Keyword(ref ks) => match *other { Keyword(ref ko) => ko.cmp(&ks), _ => ord_order },
NamespacedKeyword(ref ks)
=> match *other { NamespacedKeyword(ref ko) => ko.cmp(&ks), _ => ord_order },
Vector(ref vs) => match *other { Vector(ref vo) => vo.cmp(&vs), _ => ord_order },
List(ref ls) => match *other { List(ref lo) => lo.cmp(&ls), _ => ord_order },
Set(ref ss) => match *other { Set(ref so) => so.cmp(&ss), _ => ord_order },
Expand All @@ -73,13 +81,29 @@ fn to_ord(value: &Value) -> i32 {
BigInteger(_) => 3,
Float(_) => 4,
Text(_) => 5,
Symbol(_) => 6,
Keyword(_) => 7,
Vector(_) => 8,
List(_) => 9,
Set(_) => 10,
Map(_) => 12,
PlainSymbol(_) => 6,
NamespacedSymbol(_) => 7,
Keyword(_) => 8,
NamespacedKeyword(_) => 9,
Vector(_) => 10,
List(_) => 11,
Set(_) => 12,
Map(_) => 13,
}
}

pub struct Pair(Value, Value);

pub fn to_symbol(namespace: Option<&str>, name: &str) -> Value {
if let Some(ns) = namespace {
return Value::NamespacedSymbol(symbols::NamespacedSymbol::new(ns, name));
}
return Value::PlainSymbol(symbols::PlainSymbol::new(name));
}

pub fn to_keyword(namespace: Option<&str>, name: &str) -> Value {
if let Some(ns) = namespace {
return Value::NamespacedKeyword(symbols::NamespacedKeyword::new(ns, name));
}
return Value::Keyword(symbols::Keyword::new(name));
}
Loading