mozilla · rnewman · Jan 11, 2017 · joewalker · Jan 12, 2017 · rnewman
diff --git a/edn/src/edn.rustpeg b/edn/src/edn.rustpeg
@@ -12,9 +12,11 @@
 
 use std::collections::{BTreeSet, BTreeMap, LinkedList};
 use std::iter::FromIterator;
+
 use num::BigInt;
-use types::Value;
 use ordered_float::OrderedFloat;
+use types;
+use types::Value;
 
 // Goal: Be able to parse https://github.com/edn-format/edn
 // Also extensible to help parse http://docs.datomic.com/query.html
@@ -71,23 +73,36 @@ text -> Value = "\"" t:$(  char* ) "\"" {
     Value::Text(t.to_string())
 }
 
+namespace_divider = "."
+namespace_separator = "/"
+
 // TODO: Be more picky here
-symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.]
-symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>/.] / "-"
+symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]
+symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]
 
-#[export]
-symbol -> Value = s:$( symbol_char_initial symbol_char_subsequent* ) {
-    Value::Symbol(s.to_string())
-}
+symbol_namespace = symbol_char_initial+ (namespace_divider symbol_char_subsequent+)*
+symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." )
+
+keyword_prefix = ":"
 
-keyword_char_initial = ":"
 // TODO: More chars here?
-keyword_char_subsequent = [a-z] / [A-Z] / [0-9] / "/"
+keyword_namespace_char = [a-z] / [A-Z] / [0-9]
+keyword_namespace = keyword_namespace_char+ (namespace_divider keyword_namespace_char+)*
+
+keyword_name_char = [a-z] / [A-Z] / [0-9] / "."
+keyword_name = keyword_name_char+
 
 #[export]
-keyword -> Value = k:$( keyword_char_initial keyword_char_subsequent+ ) {
-    Value::Keyword(k.to_string())
-}
+symbol -> Value
+    = ns:( sns:$(symbol_namespace) namespace_separator { sns })? n:$(symbol_name) {
+        types::to_symbol(ns, n)
+    }
+
+#[export]
+keyword -> Value
+    = keyword_prefix ns:( kns:$(keyword_namespace) namespace_separator { kns })? n:$(keyword_name) {
+        types::to_keyword(ns, n)
+    }
 
 #[export]
 list -> Value = "(" __ v:(__ value)* __ ")" {

diff --git a/edn/src/keyword.rs b/edn/src/keyword.rs
diff --git a/edn/src/lib.rs b/edn/src/lib.rs
@@ -13,7 +13,7 @@
 extern crate ordered_float;
 extern crate num;
 
-pub mod keyword;
+pub mod symbols;
 pub mod types;
 
 pub mod parse {

diff --git a/edn/src/symbols.rs b/edn/src/symbols.rs
@@ -0,0 +1,164 @@
+// Copyright 2016 Mozilla
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+// this file except in compliance with the License. You may obtain a copy of the
+// License at http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// A simplification of Clojure's Symbol.
+#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
+pub struct PlainSymbol(pub String);
+
+#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
+pub struct NamespacedSymbol {
+    // We derive PartialOrd, which implements a lexicographic based
+    // on the order of members, so put namespace first.
+    pub namespace: String,
+    pub name: String,
+}
+
+/// A keyword is a symbol, optionally with a namespace, that prints with a leading colon.
+/// This concept is imported from Clojure, as it features in EDN and the query
+/// syntax that we use.
+///
+/// Clojure's constraints are looser than ours, allowing empty namespaces or
+/// names:
+///
+/// ```clojure
+/// user=> (keyword "" "")
+/// :/
+/// user=> (keyword "foo" "")
+/// :foo/
+/// user=> (keyword "" "bar")
+/// :/bar
+/// ```
+///
+/// We think that's nonsense, so we only allow keywords like `:bar` and `:foo/bar`,
+/// with both namespace and main parts containing no whitespace and no colon or slash:
+///
+/// ```rust
+/// # use edn::symbols::Keyword;
+/// # use edn::symbols::NamespacedKeyword;
+/// let bar     = Keyword::new("bar");                         // :bar
+/// let foo_bar = NamespacedKeyword::new("foo", "bar");        // :foo/bar
+/// assert_eq!("bar", bar.0);
+/// assert_eq!("bar", foo_bar.name);
+/// assert_eq!("foo", foo_bar.namespace);
+/// ```
+///
+/// If you're not sure whether your input is well-formed, you should use a
+/// parser or a reader function first to validate. TODO: implement `read`.
+///
+/// Callers are expected to follow these rules:
+/// http://www.clojure.org/reference/reader#_symbols
+///
+/// Future: fast equality (interning?) for keywords.
+///
+#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
+pub struct Keyword(pub String);
+
+#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
+pub struct NamespacedKeyword {
+    // We derive PartialOrd, which implements a lexicographic based
+    // on the order of members, so put namespace first.
+    pub namespace: String,
+    pub name: String,
+}
+
+impl PlainSymbol {
+    pub fn new(name: &str) -> Self {
+        assert!(!name.is_empty(), "Symbols cannot be unnamed.");
+
+        return PlainSymbol(name.to_string());
+    }
+}
+
+impl NamespacedSymbol {
+    pub fn new(namespace: &str, name: &str) -> Self {
+        assert!(!name.is_empty(), "Symbols cannot be unnamed.");
+        assert!(!namespace.is_empty(), "Symbols cannot have an empty non-null namespace.");
+
+        return NamespacedSymbol { name: name.to_string(), namespace: namespace.to_string() };
+    }
+}
+
+impl Keyword {
+    pub fn new(name: &str) -> Self {
+        assert!(!name.is_empty(), "Keywords cannot be unnamed.");
+
+        return Keyword(name.to_string());
+    }
+}
+
+impl NamespacedKeyword {
+    pub fn new(namespace: &str, name: &str) -> Self {
+        assert!(!name.is_empty(), "Keywords cannot be unnamed.");
+        assert!(!namespace.is_empty(), "Keywords cannot have an empty non-null namespace.");
+
+        // TODO: debug asserts to ensure that neither field matches [ :/].
+        return NamespacedKeyword { name: name.to_string(), namespace: namespace.to_string() };
+    }
+}
+
+//
+// Note that we don't currently do any escaping.
+//
+
+impl ToString for PlainSymbol {
+    /// Print the symbol in EDN format.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use edn::symbols::PlainSymbol;
+    /// assert_eq!("baz", PlainSymbol::new("baz").to_string());
+    /// ```
+    fn to_string(&self) -> String {
+        return format!("{}", self.0);
+    }
+}
+
+impl ToString for NamespacedSymbol {
+    /// Print the symbol in EDN format.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use edn::symbols::NamespacedSymbol;
+    /// assert_eq!("bar/baz", NamespacedSymbol::new("bar", "baz").to_string());
+    /// ```
+    fn to_string(&self) -> String {
+        return format!("{}/{}", self.namespace, self.name);
+    }
+}
+
+impl ToString for Keyword {
+    /// Print the keyword in EDN format.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use edn::symbols::Keyword;
+    /// assert_eq!(":baz", Keyword::new("baz").to_string());
+    /// ```
+    fn to_string(&self) -> String {
+        return format!(":{}", self.0);
+    }
+}
+
+impl ToString for NamespacedKeyword {
+    /// Print the keyword in EDN format.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use edn::symbols::NamespacedKeyword;
+    /// assert_eq!(":bar/baz", NamespacedKeyword::new("bar", "baz").to_string());
+    /// ```
+    fn to_string(&self) -> String {
+        return format!(":{}/{}", self.namespace, self.name);
+    }
+}
diff --git a/edn/src/types.rs b/edn/src/types.rs
@@ -10,6 +10,8 @@
 
 use std::collections::{BTreeSet, BTreeMap, LinkedList};
 use std::cmp::{Ordering, Ord, PartialOrd};
+
+use symbols;
 use num::BigInt;
 use ordered_float::OrderedFloat;
 
@@ -23,8 +25,10 @@ pub enum Value {
     // https://users.rust-lang.org/t/hashmap-key-cant-be-float-number-type-why/7892
     Float(OrderedFloat<f64>),
     Text(String),
-    Symbol(String),
-    Keyword(String),
+    PlainSymbol(symbols::PlainSymbol),
+    NamespacedSymbol(symbols::NamespacedSymbol),
+    Keyword(symbols::Keyword),
+    NamespacedKeyword(symbols::NamespacedKeyword),
     Vector(Vec<Value>),
     List(LinkedList<Value>),
     // We're using BTree{Set, Map} rather than Hash{Set, Map} because the BTree variants
@@ -55,8 +59,12 @@ impl Ord for Value {
             Integer(is)     => match *other { Integer(io)     => io.cmp(&is), _ => ord_order },
             Float(ref fs)   => match *other { Float(ref fo)   => fo.cmp(&fs), _ => ord_order },
             Text(ref ts)    => match *other { Text(ref to)    => to.cmp(&ts), _ => ord_order },
-            Symbol(ref ss)  => match *other { Symbol(ref so)  => so.cmp(&ss), _ => ord_order },
+            PlainSymbol(ref ss)  => match *other { PlainSymbol(ref so)  => so.cmp(&ss), _ => ord_order },
+            NamespacedSymbol(ref ss)
+                => match *other { NamespacedSymbol(ref so)    => so.cmp(&ss), _ => ord_order },
             Keyword(ref ks) => match *other { Keyword(ref ko) => ko.cmp(&ks), _ => ord_order },
+            NamespacedKeyword(ref ks)
+                => match *other { NamespacedKeyword(ref ko)   => ko.cmp(&ks), _ => ord_order },
             Vector(ref vs)  => match *other { Vector(ref vo)  => vo.cmp(&vs), _ => ord_order },
             List(ref ls)    => match *other { List(ref lo)    => lo.cmp(&ls), _ => ord_order },
             Set(ref ss)     => match *other { Set(ref so)     => so.cmp(&ss), _ => ord_order },
@@ -73,13 +81,29 @@ fn to_ord(value: &Value) -> i32 {
         BigInteger(_) => 3,
         Float(_) => 4,
         Text(_) => 5,
-        Symbol(_) => 6,
-        Keyword(_) => 7,
-        Vector(_) => 8,
-        List(_) => 9,
-        Set(_) => 10,
-        Map(_) => 12,
+        PlainSymbol(_) => 6,
+        NamespacedSymbol(_) => 7,
+        Keyword(_) => 8,
+        NamespacedKeyword(_) => 9,
+        Vector(_) => 10,
+        List(_) => 11,
+        Set(_) => 12,
+        Map(_) => 13,
     }
 }
 
 pub struct Pair(Value, Value);
+
+pub fn to_symbol(namespace: Option<&str>, name: &str) -> Value {
+    if let Some(ns) = namespace {
+        return Value::NamespacedSymbol(symbols::NamespacedSymbol::new(ns, name));
+    }
+    return Value::PlainSymbol(symbols::PlainSymbol::new(name));
+}
+
+pub fn to_keyword(namespace: Option<&str>, name: &str) -> Value {
+    if let Some(ns) = namespace {
+        return Value::NamespacedKeyword(symbols::NamespacedKeyword::new(ns, name));
+    }
+    return Value::Keyword(symbols::Keyword::new(name));
+}