From 428b99547ca167f2b93b69c67b0cc81a71128b3d Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Wed, 24 May 2017 23:29:17 -0600 Subject: [PATCH 01/48] WIP code point based string functions --- src/Data/String/CodePoints.js | 71 +++++++++++++++++++++++++++++++++ src/Data/String/CodePoints.purs | 66 ++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 src/Data/String/CodePoints.js create mode 100644 src/Data/String/CodePoints.purs diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js new file mode 100644 index 0000000..e4e56cf --- /dev/null +++ b/src/Data/String/CodePoints.js @@ -0,0 +1,71 @@ +const hasArrayFrom = typeof Array.from === 'function'; +const hasStringIterator = + typeof Symbol !== 'undefined' && + Symbol != null && + typeof Symbol.iterator !== 'undefined' && + typeof String.prototype[Symbol.iterator] === 'function'; + +exports._codePointAt = function (fallback) { + return function (Just) { + return function (Nothing) { + return function (relIndex) { + return function (str) { + let length = str.length; + if (length <= relIndex) return Nothing; + let index = relIndex < 0 ? ((relIndex % length) + length) % length : relIndex; + if (typeof String.prototype.codePointAt === 'function') { + let cp = str.codePointAt(index); + return cp == null ? Nothing : Just(cp); + } else if (hasArrayFrom) { + let cps = Array.from(str); + if (cps.length <= index) return Nothing; + return Just(cps[index]); + } else if (hasStringIterator) { + let iter = str[Symbol.iterator](); + for (;;) { + let { value, done } = iter.next(); + if (done) return Nothing; + if (i == 0) return Just(value); + --i; + } + } + return fallback(index)(str); + }; + }; + }; + }; +}; + +exports._toCodePointArray = function (str) { + if (hasArrayFrom) { + return Array.from(str); + } else if (hasStringIterator) { + let accum = []; + let iter = str[Symbol.iterator](); + for (;;) { + let { value, done } = iter.next(); + if (done) return accum; + accum.push(value); + } + } + let accum = []; + for (let cuCount = 0; cuCount < str.length; ++cuCount) { + let cu = str[cuCount]; + let cp = cu; + if (isLead(cu) && cuCount + 1 < str.length) { + let lead = cu; + let trail = str[cuCount + 1]; + if (isTrail(trail)) { + cp = unsurrogate(lead, trail); + } + } + accum.push(cp); + } + return accum; +}; + +function isLead(cu) { return 0xD800 <= cu && cu <= 0xDBFF; } +function isTrail(cu) { return 0xDC00 <= cu && cu <= 0xDFFF; } +function unsurrogate(h, l) { + return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; +} diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs new file mode 100644 index 0000000..7176282 --- /dev/null +++ b/src/Data/String/CodePoints.purs @@ -0,0 +1,66 @@ +module CodePoints + ( CodePoint() + --, Pattern() + --, codePointAt + --, fromCodePointArray + --, contains + --, indexOf + --, indexOf' + --, lastIndexOf + --, lastIndexOf' + --, uncons + --, length + --, singleton + --, replace + --, replaceAll + --, take + --, takeWhile + --, drop + --, dropWhile + --, stripPrefix + --, stripSuffix + --, count + --, split + --, splitAt + --, toCodePointArray + ) where + +import Prelude ((&&), (<=), (*), (+), (-)) +import Data.Maybe (Maybe(Just, Nothing)) + +newtype CodePoint = CodePoint Int + +codePointFromInt :: Int -> Maybe CodePoint +codePointFromInt n | 0 <= n && n <= 0x10FFFF = Just (CodePoint n) +codePointFromInt n = Nothing + +codePointToInt :: CodePoint -> Int +codePointToInt (CodePoint n) = n + +codePointFromSurrogatePair :: Int -> Int -> Maybe CodePoint +codePointFromSurrogatePair lead trail | isLead lead && isTrail trail + = Just (CodePoint (unsurrogate lead trail)) + where unsurrogate h l = (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000 +codePointFromSurrogatePair _ _ = Nothing + +isLead :: Int -> Boolean +isLead cu = 0xD800 <= cu && cu <= 0xDBFF + +isTrail :: Int -> Boolean +isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF + +codePointAt :: Int -> String -> Maybe CodePoint +codePointAt = _codePointAt (Just . CodePoint) Nothing + +foreign import _codePointAt + :: (Int -> String -> Maybe CodePoint) + -> (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Int + -> String + -> Maybe CodePoint + +codePointAtFallback :: Int -> String -> Maybe CodePoint +codePointAtFallback n s = CodePoint <$> index (toCodePointArray s) n + +foreign import _toCodePointArray :: String -> Array CodePoint From dc0577c026065b722ea86c45b22ffaf59f4016c4 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 25 May 2017 00:16:56 -0600 Subject: [PATCH 02/48] more progress --- bower.json | 5 ++- src/Data/String/CodePoints.js | 63 ++++++++++++--------------------- src/Data/String/CodePoints.purs | 37 +++++++++++++++---- 3 files changed, 57 insertions(+), 48 deletions(-) diff --git a/bower.json b/bower.json index e92fb95..a1fa92d 100644 --- a/bower.json +++ b/bower.json @@ -20,7 +20,10 @@ "purescript-either": "^3.0.0", "purescript-gen": "^1.1.0", "purescript-maybe": "^3.0.0", - "purescript-partial": "^1.2.0" + "purescript-partial": "^1.2.0", + "purescript-unfoldable": "^3.0.0", + "purescript-lists": "^4.1.1", + "purescript-arrays": "^4.0.1" }, "devDependencies": { "purescript-assert": "^3.0.0", diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index e4e56cf..16d499d 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -1,5 +1,5 @@ -const hasArrayFrom = typeof Array.from === 'function'; -const hasStringIterator = +var hasArrayFrom = typeof Array.from === 'function'; +var hasStringIterator = typeof Symbol !== 'undefined' && Symbol != null && typeof Symbol.iterator !== 'undefined' && @@ -10,22 +10,22 @@ exports._codePointAt = function (fallback) { return function (Nothing) { return function (relIndex) { return function (str) { - let length = str.length; + var length = str.length; if (length <= relIndex) return Nothing; - let index = relIndex < 0 ? ((relIndex % length) + length) % length : relIndex; + var index = relIndex < 0 ? ((relIndex % length) + length) % length : relIndex; if (typeof String.prototype.codePointAt === 'function') { - let cp = str.codePointAt(index); + var cp = str.codePointAt(index); return cp == null ? Nothing : Just(cp); } else if (hasArrayFrom) { - let cps = Array.from(str); + var cps = Array.from(str); if (cps.length <= index) return Nothing; return Just(cps[index]); } else if (hasStringIterator) { - let iter = str[Symbol.iterator](); + var iter = str[Symbol.iterator](); for (;;) { - let { value, done } = iter.next(); - if (done) return Nothing; - if (i == 0) return Just(value); + var o = iter.next(); + if (o.done) return Nothing; + if (i == 0) return Just(o.value); --i; } } @@ -36,36 +36,19 @@ exports._codePointAt = function (fallback) { }; }; -exports._toCodePointArray = function (str) { - if (hasArrayFrom) { - return Array.from(str); - } else if (hasStringIterator) { - let accum = []; - let iter = str[Symbol.iterator](); - for (;;) { - let { value, done } = iter.next(); - if (done) return accum; - accum.push(value); - } - } - let accum = []; - for (let cuCount = 0; cuCount < str.length; ++cuCount) { - let cu = str[cuCount]; - let cp = cu; - if (isLead(cu) && cuCount + 1 < str.length) { - let lead = cu; - let trail = str[cuCount + 1]; - if (isTrail(trail)) { - cp = unsurrogate(lead, trail); +exports._toCodePointArray = function (fallback) { + return function (str) { + if (hasArrayFrom) { + return Array.from(str); + } else if (hasStringIterator) { + var accum = []; + var iter = str[Symbol.iterator](); + for (;;) { + var o = iter.next(); + if (o.done) return accum; + accum.push(o.value); } } - accum.push(cp); - } - return accum; + return fallback(str); + }; }; - -function isLead(cu) { return 0xD800 <= cu && cu <= 0xDBFF; } -function isTrail(cu) { return 0xDC00 <= cu && cu <= 0xDFFF; } -function unsurrogate(h, l) { - return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; -} diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 7176282..49950fc 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -1,7 +1,7 @@ module CodePoints ( CodePoint() --, Pattern() - --, codePointAt + , codePointAt --, fromCodePointArray --, contains --, indexOf @@ -22,11 +22,17 @@ module CodePoints --, count --, split --, splitAt - --, toCodePointArray + , toCodePointArray ) where -import Prelude ((&&), (<=), (*), (+), (-)) +import Prelude ((&&), (*), (+), (-), (<$>), (<=)) import Data.Maybe (Maybe(Just, Nothing)) +import Data.String (toCharArray) +import Data.Unfoldable (unfoldr) +import Data.List (List(Cons, Nil), fromFoldable) +import Data.Tuple (Tuple(Tuple)) +import Data.Array (index) +import Data.Char (toCharCode) newtype CodePoint = CodePoint Int @@ -40,9 +46,11 @@ codePointToInt (CodePoint n) = n codePointFromSurrogatePair :: Int -> Int -> Maybe CodePoint codePointFromSurrogatePair lead trail | isLead lead && isTrail trail = Just (CodePoint (unsurrogate lead trail)) - where unsurrogate h l = (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000 codePointFromSurrogatePair _ _ = Nothing +unsurrogate :: Int -> Int -> Int +unsurrogate h l = (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000 + isLead :: Int -> Boolean isLead cu = 0xD800 <= cu && cu <= 0xDBFF @@ -50,7 +58,7 @@ isTrail :: Int -> Boolean isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF codePointAt :: Int -> String -> Maybe CodePoint -codePointAt = _codePointAt (Just . CodePoint) Nothing +codePointAt = _codePointAt codePointAtFallback Just Nothing foreign import _codePointAt :: (Int -> String -> Maybe CodePoint) @@ -61,6 +69,21 @@ foreign import _codePointAt -> Maybe CodePoint codePointAtFallback :: Int -> String -> Maybe CodePoint -codePointAtFallback n s = CodePoint <$> index (toCodePointArray s) n +codePointAtFallback n s = index (toCodePointArray s) n + +toCodePointArray :: String -> Array CodePoint +toCodePointArray = _toCodePointArray toCodePointArrayFallback + +foreign import _toCodePointArray + :: (String -> Array CodePoint) + -> String + -> Array CodePoint -foreign import _toCodePointArray :: String -> Array CodePoint +toCodePointArrayFallback :: String -> Array CodePoint +toCodePointArrayFallback s = unfoldr decode (fromFoldable (toCharCode <$> toCharArray s)) + where + decode :: List Int -> Maybe (Tuple CodePoint (List Int)) + decode (Cons h (Cons l rest)) | isLead h && isTrail l + = Just (Tuple (CodePoint (unsurrogate h l)) rest) + decode (Cons c rest) = Just (Tuple (CodePoint c) rest) + decode Nil = Nothing From 25572de0630cc5e43c95dee1f242ea824911a2bc Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 25 May 2017 00:28:24 -0600 Subject: [PATCH 03/48] minor stuff --- src/Data/String/CodePoints.js | 1 + src/Data/String/CodePoints.purs | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 16d499d..eeb3d2f 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -21,6 +21,7 @@ exports._codePointAt = function (fallback) { if (cps.length <= index) return Nothing; return Just(cps[index]); } else if (hasStringIterator) { + var i = index; var iter = str[Symbol.iterator](); for (;;) { var o = iter.next(); diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 49950fc..7ceccda 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -2,27 +2,29 @@ module CodePoints ( CodePoint() --, Pattern() , codePointAt - --, fromCodePointArray + , codePointFromInt + , codePointToInt --, contains + --, count + --, drop + --, dropWhile --, indexOf --, indexOf' --, lastIndexOf --, lastIndexOf' - --, uncons --, length - --, singleton --, replace --, replaceAll - --, take - --, takeWhile - --, drop - --, dropWhile - --, stripPrefix - --, stripSuffix - --, count + --, singleton --, split --, splitAt + --, stripPrefix + --, stripSuffix + --, take + --, takeWhile + --, uncons , toCodePointArray + --, fromCodePointArray ) where import Prelude ((&&), (*), (+), (-), (<$>), (<=)) @@ -57,6 +59,7 @@ isLead cu = 0xD800 <= cu && cu <= 0xDBFF isTrail :: Int -> Boolean isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF + codePointAt :: Int -> String -> Maybe CodePoint codePointAt = _codePointAt codePointAtFallback Just Nothing @@ -71,6 +74,7 @@ foreign import _codePointAt codePointAtFallback :: Int -> String -> Maybe CodePoint codePointAtFallback n s = index (toCodePointArray s) n + toCodePointArray :: String -> Array CodePoint toCodePointArray = _toCodePointArray toCodePointArrayFallback From 8279da8f9d13f9a6b66514e62f570403380e156a Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 25 May 2017 15:50:17 -0600 Subject: [PATCH 04/48] count --- src/Data/String/CodePoints.purs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 7ceccda..aece075 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -5,7 +5,7 @@ module CodePoints , codePointFromInt , codePointToInt --, contains - --, count + , count --, drop --, dropWhile --, indexOf @@ -27,13 +27,13 @@ module CodePoints --, fromCodePointArray ) where -import Prelude ((&&), (*), (+), (-), (<$>), (<=)) +import Prelude ((&&), (*), (+), (-), (<$>), (<=), (<<<)) import Data.Maybe (Maybe(Just, Nothing)) import Data.String (toCharArray) import Data.Unfoldable (unfoldr) import Data.List (List(Cons, Nil), fromFoldable) import Data.Tuple (Tuple(Tuple)) -import Data.Array (index) +import Data.Array (index, length, filter) import Data.Char (toCharCode) newtype CodePoint = CodePoint Int @@ -75,6 +75,10 @@ codePointAtFallback :: Int -> String -> Maybe CodePoint codePointAtFallback n s = index (toCodePointArray s) n +count :: (CodePoint -> Boolean) -> String -> Int +count pred = length <<< filter pred <<< toCodePointArray + + toCodePointArray :: String -> Array CodePoint toCodePointArray = _toCodePointArray toCodePointArrayFallback From 292e0de444253949d163e9a96bbaaa673723752e Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 25 May 2017 16:19:03 -0600 Subject: [PATCH 05/48] drop and take --- src/Data/String/CodePoints.js | 36 +++++++++++++++++++++++++++++++++ src/Data/String/CodePoints.purs | 26 +++++++++++++++++++----- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index eeb3d2f..2c87a56 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -4,6 +4,7 @@ var hasStringIterator = Symbol != null && typeof Symbol.iterator !== 'undefined' && typeof String.prototype[Symbol.iterator] === 'function'; +var hasFromCodePoint = typeof String.prototype.fromCodePoint === 'function'; exports._codePointAt = function (fallback) { return function (Just) { @@ -37,6 +38,26 @@ exports._codePointAt = function (fallback) { }; }; +exports._take = function (fallback) { + return function (n) { + return function (str) { + if (hasArrayFrom) { + return Array.from(str); + } else if (hasStringIterator) { + var accum = ""; + var iter = str[Symbol.iterator](); + for (var i = 0; i < n; ++i) { + var o = iter.next(); + if (o.done) return accum; + accum += o.value; + } + return accum; + } + return fallback(str); + }; + }; +}; + exports._toCodePointArray = function (fallback) { return function (str) { if (hasArrayFrom) { @@ -53,3 +74,18 @@ exports._toCodePointArray = function (fallback) { return fallback(str); }; }; + + +exports.fromCodePointArray = function (cps) { + if (hasFromCodePoint) { + return String.fromCodePoint.apply(cps); + } + return cps.map(fromCodePoint).join(''); +}; + +function fromCodePoint(cp) { + if (cp <= 0xFFFF) return String.fromCharCode(cp); + var cu1 = String.fromCharCode(Math.floor((cp - 0x10000) / 0x400) + 0xD800); + var cu2 = String.fromCharCode((cp - 0x10000) % 0x400 + 0xDC00); + return cu1 + cu2; +} diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index aece075..eb52f96 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -6,7 +6,7 @@ module CodePoints , codePointToInt --, contains , count - --, drop + , drop --, dropWhile --, indexOf --, indexOf' @@ -20,7 +20,7 @@ module CodePoints --, splitAt --, stripPrefix --, stripSuffix - --, take + , take --, takeWhile --, uncons , toCodePointArray @@ -33,7 +33,7 @@ import Data.String (toCharArray) import Data.Unfoldable (unfoldr) import Data.List (List(Cons, Nil), fromFoldable) import Data.Tuple (Tuple(Tuple)) -import Data.Array (index, length, filter) +import Data.Array as Array import Data.Char (toCharCode) newtype CodePoint = CodePoint Int @@ -72,11 +72,24 @@ foreign import _codePointAt -> Maybe CodePoint codePointAtFallback :: Int -> String -> Maybe CodePoint -codePointAtFallback n s = index (toCodePointArray s) n +codePointAtFallback n s = Array.index (toCodePointArray s) n count :: (CodePoint -> Boolean) -> String -> Int -count pred = length <<< filter pred <<< toCodePointArray +count pred = Array.length <<< Array.filter pred <<< toCodePointArray + + +drop :: Int -> String -> String +drop n s = fromCodePointArray (Array.drop n (toCodePointArray s)) + + +take :: Int -> String -> String +take = _take takeFallback + +foreign import _take :: (Int -> String -> String) -> Int -> String -> String + +takeFallback :: Int -> String -> String +takeFallback n s = fromCodePointArray (Array.take n (toCodePointArray s)) toCodePointArray :: String -> Array CodePoint @@ -95,3 +108,6 @@ toCodePointArrayFallback s = unfoldr decode (fromFoldable (toCharCode <$> toChar = Just (Tuple (CodePoint (unsurrogate h l)) rest) decode (Cons c rest) = Just (Tuple (CodePoint c) rest) decode Nil = Nothing + + +foreign import fromCodePointArray :: Array CodePoint -> String From fd91b0ba09f88318611d503cd89eef6de947c862 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 25 May 2017 17:00:15 -0600 Subject: [PATCH 06/48] length --- src/Data/String/CodePoints.purs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index eb52f96..dd719a7 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -12,7 +12,7 @@ module CodePoints --, indexOf' --, lastIndexOf --, lastIndexOf' - --, length + , length --, replace --, replaceAll --, singleton @@ -83,6 +83,10 @@ drop :: Int -> String -> String drop n s = fromCodePointArray (Array.drop n (toCodePointArray s)) +length :: String -> Int +length = Array.length <<< toCodePointArray + + take :: Int -> String -> String take = _take takeFallback From 83876413a8dd5b907c20f51944838a0d1602d03d Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 25 May 2017 17:02:19 -0600 Subject: [PATCH 07/48] singleton --- src/Data/String/CodePoints.js | 2 ++ src/Data/String/CodePoints.purs | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 2c87a56..1d3469a 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -38,6 +38,8 @@ exports._codePointAt = function (fallback) { }; }; +exports.singleton = fromCodePoint; + exports._take = function (fallback) { return function (n) { return function (str) { diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index dd719a7..4f20f8f 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -15,7 +15,7 @@ module CodePoints , length --, replace --, replaceAll - --, singleton + , singleton --, split --, splitAt --, stripPrefix @@ -87,6 +87,9 @@ length :: String -> Int length = Array.length <<< toCodePointArray +foreign import singleton :: CodePoint -> String + + take :: Int -> String -> String take = _take takeFallback From fb473871d8f7620bedda52e6802f86aa5bf03bb8 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 26 May 2017 10:04:05 -0600 Subject: [PATCH 08/48] splitAt --- src/Data/String/CodePoints.purs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 4f20f8f..def5b99 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -17,7 +17,7 @@ module CodePoints --, replaceAll , singleton --, split - --, splitAt + , splitAt --, stripPrefix --, stripSuffix , take @@ -27,7 +27,7 @@ module CodePoints --, fromCodePointArray ) where -import Prelude ((&&), (*), (+), (-), (<$>), (<=), (<<<)) +import Prelude ((&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<)) import Data.Maybe (Maybe(Just, Nothing)) import Data.String (toCharArray) import Data.Unfoldable (unfoldr) @@ -90,6 +90,17 @@ length = Array.length <<< toCodePointArray foreign import singleton :: CodePoint -> String +splitAt :: Int -> String -> Maybe { before :: String, after :: String } +splitAt i s = + let cps = toCodePointArray s in + if i < 0 || Array.length cps <= i + then Nothing + else Just { + before: fromCodePointArray (Array.take i cps), + after: fromCodePointArray (Array.drop i cps) + } + + take :: Int -> String -> String take = _take takeFallback From 5a6cfd069df05db7f452fec062590b0155292143 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 26 May 2017 10:16:10 -0600 Subject: [PATCH 09/48] use String.fromCodePoint in singleton implementation when available --- src/Data/String/CodePoints.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 1d3469a..4c0cc48 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -38,7 +38,7 @@ exports._codePointAt = function (fallback) { }; }; -exports.singleton = fromCodePoint; +exports.singleton = hasFromCodePoint ? String.fromCodePoint : fromCodePoint; exports._take = function (fallback) { return function (n) { @@ -80,7 +80,7 @@ exports._toCodePointArray = function (fallback) { exports.fromCodePointArray = function (cps) { if (hasFromCodePoint) { - return String.fromCodePoint.apply(cps); + return String.fromCodePoint.apply(String, cps); } return cps.map(fromCodePoint).join(''); }; From 3003c090d6fcc53e90222bca61f600569cac916e Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 26 May 2017 14:44:05 -0600 Subject: [PATCH 10/48] re-export Data.String --- src/Data/String/CodePoints.purs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index def5b99..2c6887b 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -1,40 +1,36 @@ module CodePoints ( CodePoint() - --, Pattern() , codePointAt , codePointFromInt , codePointToInt - --, contains , count , drop --, dropWhile + , fromCodePointArray --, indexOf --, indexOf' --, lastIndexOf --, lastIndexOf' , length - --, replace - --, replaceAll , singleton - --, split , splitAt - --, stripPrefix - --, stripSuffix , take --, takeWhile --, uncons , toCodePointArray - --, fromCodePointArray + + , module StringReExports ) where -import Prelude ((&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<)) +import Data.Array as Array +import Data.Char (toCharCode) +import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) import Data.String (toCharArray) -import Data.Unfoldable (unfoldr) -import Data.List (List(Cons, Nil), fromFoldable) +import Data.String hiding (count, drop, dropWhile, indexOf, indexOf', lastIndexOf, lastIndexOf', length, singleton, splitAt, take, takeWhile, uncons) as StringReExports import Data.Tuple (Tuple(Tuple)) -import Data.Array as Array -import Data.Char (toCharCode) +import Data.Unfoldable (unfoldr) +import Prelude ((&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<)) newtype CodePoint = CodePoint Int From 75117d2043a00b9bbe03798d1d4c08a37eee6ae2 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 26 May 2017 14:56:12 -0600 Subject: [PATCH 11/48] uncons --- src/Data/String/CodePoints.purs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 2c6887b..a8aa8fc 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -16,7 +16,7 @@ module CodePoints , splitAt , take --, takeWhile - --, uncons + , uncons , toCodePointArray , module StringReExports @@ -79,6 +79,9 @@ drop :: Int -> String -> String drop n s = fromCodePointArray (Array.drop n (toCodePointArray s)) +foreign import fromCodePointArray :: Array CodePoint -> String + + length :: String -> Int length = Array.length <<< toCodePointArray @@ -124,4 +127,5 @@ toCodePointArrayFallback s = unfoldr decode (fromFoldable (toCharCode <$> toChar decode Nil = Nothing -foreign import fromCodePointArray :: Array CodePoint -> String +uncons :: String -> Maybe { head :: CodePoint, tail :: String } +uncons s = { head: _, tail: drop 1 s } <$> codePointAt 0 s From ecfbf0bc6825b6af3492baa07cc3acedf69857c8 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 26 May 2017 14:57:24 -0600 Subject: [PATCH 12/48] re-arrange imports --- src/Data/String/CodePoints.purs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index a8aa8fc..3a6d180 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -1,5 +1,6 @@ module CodePoints - ( CodePoint() + ( module StringReExports + , CodePoint() , codePointAt , codePointFromInt , codePointToInt @@ -16,10 +17,8 @@ module CodePoints , splitAt , take --, takeWhile - , uncons , toCodePointArray - - , module StringReExports + , uncons ) where import Data.Array as Array From d5b6d92b5e140cc8843e508caf6db015d18fb15c Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 26 May 2017 15:00:01 -0600 Subject: [PATCH 13/48] re-arrange JS exports --- src/Data/String/CodePoints.js | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 4c0cc48..0851961 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -38,6 +38,13 @@ exports._codePointAt = function (fallback) { }; }; +exports.fromCodePointArray = function (cps) { + if (hasFromCodePoint) { + return String.fromCodePoint.apply(String, cps); + } + return cps.map(fromCodePoint).join(''); +}; + exports.singleton = hasFromCodePoint ? String.fromCodePoint : fromCodePoint; exports._take = function (fallback) { @@ -77,14 +84,6 @@ exports._toCodePointArray = function (fallback) { }; }; - -exports.fromCodePointArray = function (cps) { - if (hasFromCodePoint) { - return String.fromCodePoint.apply(String, cps); - } - return cps.map(fromCodePoint).join(''); -}; - function fromCodePoint(cp) { if (cp <= 0xFFFF) return String.fromCharCode(cp); var cu1 = String.fromCharCode(Math.floor((cp - 0x10000) / 0x400) + 0xD800); From 8860295f284b635cc2b1cb5b8407d76c8d617f4d Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 26 May 2017 17:38:55 -0600 Subject: [PATCH 14/48] fix count; implement dropWhile and takeWhile --- src/Data/String/CodePoints.js | 24 ++++++++++++++++++++++++ src/Data/String/CodePoints.purs | 31 ++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 0851961..df9d144 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -38,6 +38,30 @@ exports._codePointAt = function (fallback) { }; }; +exports._count = function (isLead) { + return function (isTrail) { + return function (unsurrogate) { + return function (pred) { + return function (str) { + for (var cuCount = 0, cpCount = 0; cuCount < str.length; ++cuCount, ++cpCount) { + var lead = str.charCodeAt(cuCount); + var cp = lead; + if (isLead(lead) && cuCount + 1 < str.length) { + var trail = str.charCodeAt(cuCount + 1); + if (isTrail(trail)) { + cp = unsurrogate(lead, trail); + ++cuCount; + } + } + if (!pred(cp)) return cpCount; + } + return str.length; + }; + }; + }; + }; +}; + exports.fromCodePointArray = function (cps) { if (hasFromCodePoint) { return String.fromCodePoint.apply(String, cps); diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 3a6d180..0528796 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -6,7 +6,7 @@ module CodePoints , codePointToInt , count , drop - --, dropWhile + , dropWhile , fromCodePointArray --, indexOf --, indexOf' @@ -16,7 +16,7 @@ module CodePoints , singleton , splitAt , take - --, takeWhile + , takeWhile , toCodePointArray , uncons ) where @@ -31,6 +31,7 @@ import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) import Prelude ((&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<)) + newtype CodePoint = CodePoint Int codePointFromInt :: Int -> Maybe CodePoint @@ -42,11 +43,11 @@ codePointToInt (CodePoint n) = n codePointFromSurrogatePair :: Int -> Int -> Maybe CodePoint codePointFromSurrogatePair lead trail | isLead lead && isTrail trail - = Just (CodePoint (unsurrogate lead trail)) + = Just (unsurrogate lead trail) codePointFromSurrogatePair _ _ = Nothing -unsurrogate :: Int -> Int -> Int -unsurrogate h l = (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000 +unsurrogate :: Int -> Int -> CodePoint +unsurrogate h l = CodePoint ((h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000) isLead :: Int -> Boolean isLead cu = 0xD800 <= cu && cu <= 0xDBFF @@ -71,13 +72,25 @@ codePointAtFallback n s = Array.index (toCodePointArray s) n count :: (CodePoint -> Boolean) -> String -> Int -count pred = Array.length <<< Array.filter pred <<< toCodePointArray +count = _count isLead isTrail unsurrogate + +foreign import _count + :: (Int -> Boolean) + -> (Int -> Boolean) + -> (Int -> Int -> CodePoint) + -> (CodePoint -> Boolean) + -> String + -> Int drop :: Int -> String -> String drop n s = fromCodePointArray (Array.drop n (toCodePointArray s)) +dropWhile :: (CodePoint -> Boolean) -> String -> String +dropWhile p s = drop (count p s) s + + foreign import fromCodePointArray :: Array CodePoint -> String @@ -108,6 +121,10 @@ takeFallback :: Int -> String -> String takeFallback n s = fromCodePointArray (Array.take n (toCodePointArray s)) +takeWhile :: (CodePoint -> Boolean) -> String -> String +takeWhile p s = take (count p s) s + + toCodePointArray :: String -> Array CodePoint toCodePointArray = _toCodePointArray toCodePointArrayFallback @@ -121,7 +138,7 @@ toCodePointArrayFallback s = unfoldr decode (fromFoldable (toCharCode <$> toChar where decode :: List Int -> Maybe (Tuple CodePoint (List Int)) decode (Cons h (Cons l rest)) | isLead h && isTrail l - = Just (Tuple (CodePoint (unsurrogate h l)) rest) + = Just (Tuple (unsurrogate h l) rest) decode (Cons c rest) = Just (Tuple (CodePoint c) rest) decode Nil = Nothing From a6855b4fc770eb3afb6030ef4be7ce9964349208 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 26 May 2017 18:44:31 -0600 Subject: [PATCH 15/48] indexOf and lastIndexOf --- src/Data/String/CodePoints.purs | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 0528796..68427b3 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -8,10 +8,10 @@ module CodePoints , drop , dropWhile , fromCodePointArray - --, indexOf - --, indexOf' - --, lastIndexOf - --, lastIndexOf' + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' , length , singleton , splitAt @@ -25,7 +25,7 @@ import Data.Array as Array import Data.Char (toCharCode) import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) -import Data.String (toCharArray) +import Data.String as String import Data.String hiding (count, drop, dropWhile, indexOf, indexOf', lastIndexOf, lastIndexOf', length, singleton, splitAt, take, takeWhile, uncons) as StringReExports import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) @@ -94,6 +94,22 @@ dropWhile p s = drop (count p s) s foreign import fromCodePointArray :: Array CodePoint -> String +indexOf :: String.Pattern -> String -> Maybe Int +indexOf p s = (\i -> length (String.take i s)) <$> String.indexOf p s + + +indexOf' :: String.Pattern -> Int -> String -> Maybe Int +indexOf' p i s = (\k -> length (String.take k s)) <$> String.indexOf' p i s + + +lastIndexOf :: String.Pattern -> String -> Maybe Int +lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s + + +lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int +lastIndexOf' p i s = (\k -> length (String.take k s)) <$> String.lastIndexOf' p i s + + length :: String -> Int length = Array.length <<< toCodePointArray @@ -134,7 +150,7 @@ foreign import _toCodePointArray -> Array CodePoint toCodePointArrayFallback :: String -> Array CodePoint -toCodePointArrayFallback s = unfoldr decode (fromFoldable (toCharCode <$> toCharArray s)) +toCodePointArrayFallback s = unfoldr decode (fromFoldable (toCharCode <$> String.toCharArray s)) where decode :: List Int -> Maybe (Tuple CodePoint (List Int)) decode (Cons h (Cons l rest)) | isLead h && isTrail l From 8c55257a3f12632f12e8c7027691225896571136 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sat, 27 May 2017 00:17:53 -0600 Subject: [PATCH 16/48] add some initial tests and fix some bugs --- src/Data/String/CodePoints.js | 70 ++++++------- src/Data/String/CodePoints.purs | 9 +- test/Test/Data/String/CodePoints.purs | 143 ++++++++++++++++++++++++++ test/Test/Main.purs | 2 + 4 files changed, 186 insertions(+), 38 deletions(-) create mode 100644 test/Test/Data/String/CodePoints.purs diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index df9d144..fd75ab5 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -5,30 +5,25 @@ var hasStringIterator = typeof Symbol.iterator !== 'undefined' && typeof String.prototype[Symbol.iterator] === 'function'; var hasFromCodePoint = typeof String.prototype.fromCodePoint === 'function'; +var hasCodePointAt = typeof String.prototype.codePointAt === 'function'; exports._codePointAt = function (fallback) { return function (Just) { return function (Nothing) { - return function (relIndex) { + return function (index) { return function (str) { var length = str.length; - if (length <= relIndex) return Nothing; - var index = relIndex < 0 ? ((relIndex % length) + length) % length : relIndex; - if (typeof String.prototype.codePointAt === 'function') { - var cp = str.codePointAt(index); - return cp == null ? Nothing : Just(cp); - } else if (hasArrayFrom) { + if (index < 0 || index >= length) return Nothing; + if (hasArrayFrom && hasCodePointAt) { var cps = Array.from(str); - if (cps.length <= index) return Nothing; - return Just(cps[index]); + if (index >= cps.length) return Nothing; + return Just(cps[index].codePointAt(0)); } else if (hasStringIterator) { - var i = index; var iter = str[Symbol.iterator](); - for (;;) { + for (var i = index;; --i) { var o = iter.next(); if (o.done) return Nothing; if (i == 0) return Just(o.value); - --i; } } return fallback(index)(str); @@ -43,40 +38,41 @@ exports._count = function (isLead) { return function (unsurrogate) { return function (pred) { return function (str) { - for (var cuCount = 0, cpCount = 0; cuCount < str.length; ++cuCount, ++cpCount) { + var cpCount = 0; + for (var cuCount = 0; cuCount < str.length; ++cuCount) { var lead = str.charCodeAt(cuCount); var cp = lead; if (isLead(lead) && cuCount + 1 < str.length) { var trail = str.charCodeAt(cuCount + 1); if (isTrail(trail)) { - cp = unsurrogate(lead, trail); + cp = unsurrogate(lead)(trail); ++cuCount; } } if (!pred(cp)) return cpCount; + ++cpCount; } - return str.length; + return cpCount; }; }; }; }; }; -exports.fromCodePointArray = function (cps) { - if (hasFromCodePoint) { - return String.fromCodePoint.apply(String, cps); - } - return cps.map(fromCodePoint).join(''); -}; +exports.fromCodePointArray = hasFromCodePoint + ? function (cps) { return String.fromCodePoint.apply(String, cps); } + : function (cps) { return cps.map(fromCodePoint).join(''); }; exports.singleton = hasFromCodePoint ? String.fromCodePoint : fromCodePoint; exports._take = function (fallback) { return function (n) { - return function (str) { - if (hasArrayFrom) { - return Array.from(str); - } else if (hasStringIterator) { + if (hasArrayFrom) { + return function (str) { + return Array.from(str).slice(0, n).join(''); + }; + } else if (hasStringIterator) { + return function (str) { var accum = ""; var iter = str[Symbol.iterator](); for (var i = 0; i < n; ++i) { @@ -85,27 +81,29 @@ exports._take = function (fallback) { accum += o.value; } return accum; - } - return fallback(str); - }; + }; + } + return fallback; }; }; exports._toCodePointArray = function (fallback) { - return function (str) { - if (hasArrayFrom) { - return Array.from(str); - } else if (hasStringIterator) { + if (hasArrayFrom && hasCodePointAt) { + return function (str) { + return Array.from(str, function (x) { return x.codePointAt(0); }); + }; + } else if (hasStringIterator && hasCodePointAt) { + return function (str) { var accum = []; var iter = str[Symbol.iterator](); for (;;) { var o = iter.next(); if (o.done) return accum; - accum.push(o.value); + accum.push(o.value.codePointAt(0)); } - } - return fallback(str); - }; + }; + } + return fallback; }; function fromCodePoint(cp) { diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 68427b3..6bdf282 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -1,4 +1,4 @@ -module CodePoints +module Data.String.CodePoints ( module StringReExports , CodePoint() , codePointAt @@ -25,15 +25,20 @@ import Data.Array as Array import Data.Char (toCharCode) import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) +import Data.Newtype (class Newtype) import Data.String as String import Data.String hiding (count, drop, dropWhile, indexOf, indexOf', lastIndexOf, lastIndexOf', length, singleton, splitAt, take, takeWhile, uncons) as StringReExports import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) -import Prelude ((&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<)) +import Prelude (class Eq, class Ord, (&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<)) newtype CodePoint = CodePoint Int +derive instance eqCodePoint :: Eq CodePoint +derive instance ordCodePoint :: Ord CodePoint +derive instance newtypeCodePoint :: Newtype CodePoint _ + codePointFromInt :: Int -> Maybe CodePoint codePointFromInt n | 0 <= n && n <= 0x10FFFF = Just (CodePoint n) codePointFromInt n = Nothing diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs new file mode 100644 index 0000000..28e289c --- /dev/null +++ b/test/Test/Data/String/CodePoints.purs @@ -0,0 +1,143 @@ +module Test.Data.String.CodePoints (testStringCodePoints) where + +import Prelude (Unit, Ordering(..), (==), ($), discard, negate, not, (/=), (&&), (<)) + +import Control.Monad.Eff (Eff) +import Control.Monad.Eff.Console (CONSOLE, log) + +import Data.Maybe (Maybe(..), isNothing, maybe) +import Data.String.CodePoints + +import Test.Assert (ASSERT, assert) + +str :: String +str = "a\xDC00\xD800\xD800\x16805\x16A06\&z" + +testStringCodePoints :: forall eff. Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit +testStringCodePoints = do + log "codePointAt" + assert $ codePointAt (-1) str == Nothing + assert $ codePointAt 0 str == (codePointFromInt 0x61) + assert $ codePointAt 1 str == (codePointFromInt 0xDC00) + assert $ codePointAt 2 str == (codePointFromInt 0xD800) + assert $ codePointAt 3 str == (codePointFromInt 0xD800) + assert $ codePointAt 4 str == (codePointFromInt 0x16805) + assert $ codePointAt 5 str == (codePointFromInt 0x16A06) + assert $ codePointAt 6 str == (codePointFromInt 0x7A) + assert $ codePointAt 7 str == Nothing + + log "count" + assert $ count (\_ -> true) "" == 0 + assert $ count (\_ -> false) str == 0 + assert $ count (\_ -> true) str == 7 + assert $ count (\x -> codePointToInt x < 0xFFFF) str == 4 + assert $ count (\x -> codePointToInt x < 0xDC00) str == 1 + + log "drop" + assert $ drop (-1) str == str + assert $ drop 0 str == str + assert $ drop 1 str == "\xDC00\xD800\xD800\x16805\x16A06\&z" + assert $ drop 2 str == "\xD800\xD800\x16805\x16A06\&z" + assert $ drop 3 str == "\xD800\x16805\x16A06\&z" + assert $ drop 4 str == "\x16805\x16A06\&z" + assert $ drop 5 str == "\x16A06\&z" + assert $ drop 6 str == "z" + assert $ drop 7 str == "" + assert $ drop 8 str == "" + + log "dropWhile" + assert $ dropWhile (\c -> true) str == "" + assert $ dropWhile (\c -> false) str == str + assert $ dropWhile (\c -> codePointToInt c < 0xFFFF) str == "\x16805\x16A06\&z" + assert $ dropWhile (\c -> codePointToInt c < 0xDC00) str == "\xDC00\xD800\xD800\x16805\x16A06\&z" + + log "indexOf" + assert $ indexOf (Pattern "") "" == Just 0 + assert $ indexOf (Pattern "") str == Just 0 + assert $ indexOf (Pattern "a") str == Just 0 + assert $ indexOf (Pattern "\xDC00\xD800\xD800") str == Just 1 + assert $ indexOf (Pattern "\xD800") str == Just 2 + assert $ indexOf (Pattern "\xD800\xD800") str == Just 2 + assert $ indexOf (Pattern "\xD800\xD81A") str == Just 3 + assert $ indexOf (Pattern "\xD800\x16805") str == Just 3 + assert $ indexOf (Pattern "\x16805") str == Just 4 + assert $ indexOf (Pattern "\x16A06") str == Just 5 + assert $ indexOf (Pattern "z") str == Just 6 + assert $ indexOf (Pattern "\0") str == Nothing + assert $ indexOf (Pattern "\xD81A") str == Just 4 + -- TODO: Should this be Nothing? It matches the trail surrogate of a surrogate pair. + -- It'd be nice if (drop (indexOf pattern str) str) was guaranteed to start with pattern. + -- If we change this, we'll also need to add a matching contains implementation to the CodePoints module. + -- I vote we just delete the test. Passing surrogate halves to the CodePoints functions should not be supported. + assert $ indexOf (Pattern "\xDC05") str == Just 5 + +-- log "singleton" +-- assert $ singleton 'a' == "a" +-- +-- log "takeWhile" +-- assert $ takeWhile (\c -> true) "abc" == "abc" +-- assert $ takeWhile (\c -> false) "abc" == "" +-- assert $ takeWhile (\c -> c /= 'b') "aabbcc" == "aa" +-- +-- log "indexOf'" +-- assert $ indexOf' (Pattern "") 0 "" == Just 0 +-- assert $ indexOf' (Pattern "") (-1) "ab" == Nothing +-- assert $ indexOf' (Pattern "") 0 "ab" == Just 0 +-- assert $ indexOf' (Pattern "") 1 "ab" == Just 1 +-- assert $ indexOf' (Pattern "") 2 "ab" == Just 2 +-- assert $ indexOf' (Pattern "") 3 "ab" == Nothing +-- assert $ indexOf' (Pattern "bc") 0 "abcd" == Just 1 +-- assert $ indexOf' (Pattern "bc") 1 "abcd" == Just 1 +-- assert $ indexOf' (Pattern "bc") 2 "abcd" == Nothing +-- assert $ indexOf' (Pattern "cb") 0 "abcd" == Nothing +-- +-- log "lastIndexOf" +-- assert $ lastIndexOf (Pattern "") "" == Just 0 +-- assert $ lastIndexOf (Pattern "") "abcd" == Just 4 +-- assert $ lastIndexOf (Pattern "bc") "abcd" == Just 1 +-- assert $ lastIndexOf (Pattern "cb") "abcd" == Nothing +-- +-- log "lastIndexOf'" +-- assert $ lastIndexOf' (Pattern "") 0 "" == Just 0 +-- assert $ lastIndexOf' (Pattern "") (-1) "ab" == Nothing +-- assert $ lastIndexOf' (Pattern "") 0 "ab" == Just 0 +-- assert $ lastIndexOf' (Pattern "") 1 "ab" == Just 1 +-- assert $ lastIndexOf' (Pattern "") 2 "ab" == Just 2 +-- assert $ lastIndexOf' (Pattern "") 3 "ab" == Nothing +-- assert $ lastIndexOf' (Pattern "bc") 0 "abcd" == Nothing +-- assert $ lastIndexOf' (Pattern "bc") 1 "abcd" == Just 1 +-- assert $ lastIndexOf' (Pattern "bc") 2 "abcd" == Just 1 +-- assert $ lastIndexOf' (Pattern "cb") 0 "abcd" == Nothing +-- +-- log "length" +-- assert $ length "" == 0 +-- assert $ length "a" == 1 +-- assert $ length "ab" == 2 +-- +-- log "take" +-- assert $ take 0 "ab" == "" +-- assert $ take 1 "ab" == "a" +-- assert $ take 2 "ab" == "ab" +-- assert $ take 3 "ab" == "ab" +-- assert $ take (-1) "ab" == "" +-- +-- log "count" +-- assert $ count (_ == 'a') "" == 0 +-- assert $ count (_ == 'a') "ab" == 1 +-- assert $ count (_ == 'a') "aaab" == 3 +-- assert $ count (_ == 'a') "abaa" == 1 +-- +-- log "splitAt" +-- let testSplitAt i str res = +-- assert $ case splitAt i str of +-- Nothing -> +-- isNothing res +-- Just { before, after } -> +-- maybe false (\r -> +-- r.before == before && r.after == after) res +-- +-- testSplitAt 1 "" Nothing +-- testSplitAt 0 "a" $ Just {before: "", after: "a"} +-- testSplitAt 1 "ab" $ Just {before: "a", after: "b"} +-- testSplitAt 3 "aabcc" $ Just {before: "aab", after: "cc"} +-- testSplitAt (-1) "abc" $ Nothing diff --git a/test/Test/Main.purs b/test/Test/Main.purs index b2c7f50..8260cc6 100644 --- a/test/Test/Main.purs +++ b/test/Test/Main.purs @@ -8,6 +8,7 @@ import Control.Monad.Eff.Console (CONSOLE) import Test.Assert (ASSERT) import Test.Data.Char (testChar) import Test.Data.String (testString) +import Test.Data.String.CodePoints (testStringCodePoints) import Test.Data.String.Regex (testStringRegex) import Test.Data.String.Unsafe (testStringUnsafe) import Test.Data.String.CaseInsensitive (testCaseInsensitiveString) @@ -16,6 +17,7 @@ main :: Eff (console :: CONSOLE, assert :: ASSERT) Unit main = do testChar testString + testStringCodePoints testStringUnsafe testStringRegex testCaseInsensitiveString From a26afdf1cf62120f6b3459d5feacc22d6ff93f8b Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sat, 27 May 2017 00:32:42 -0600 Subject: [PATCH 17/48] trailing whitespace --- src/Data/String/CodePoints.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index fd75ab5..d453de6 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -1,5 +1,5 @@ var hasArrayFrom = typeof Array.from === 'function'; -var hasStringIterator = +var hasStringIterator = typeof Symbol !== 'undefined' && Symbol != null && typeof Symbol.iterator !== 'undefined' && From c1ff8c579316c6a34c605a5d702811597ea1cfcd Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sat, 27 May 2017 16:11:14 -0600 Subject: [PATCH 18/48] finished the tests --- src/Data/String/CodePoints.js | 2 +- src/Data/String/CodePoints.purs | 10 +- test/Test/Data/String/CodePoints.purs | 189 ++++++++++++++++---------- 3 files changed, 126 insertions(+), 75 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index d453de6..c1d9163 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -69,7 +69,7 @@ exports._take = function (fallback) { return function (n) { if (hasArrayFrom) { return function (str) { - return Array.from(str).slice(0, n).join(''); + return Array.from(str).slice(0, Math.max(0, n)).join(''); }; } else if (hasStringIterator) { return function (str) { diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 6bdf282..f6b49ed 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -104,7 +104,9 @@ indexOf p s = (\i -> length (String.take i s)) <$> String.indexOf p s indexOf' :: String.Pattern -> Int -> String -> Maybe Int -indexOf' p i s = (\k -> length (String.take k s)) <$> String.indexOf' p i s +indexOf' p i s = + let s' = drop i s in + (\k -> i + length (String.take k s')) <$> String.indexOf p s' lastIndexOf :: String.Pattern -> String -> Maybe Int @@ -112,7 +114,9 @@ lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int -lastIndexOf' p i s = (\k -> length (String.take k s)) <$> String.lastIndexOf' p i s +lastIndexOf' p i s = + let s' = drop i s in + (\k -> i + length (String.take k s')) <$> String.lastIndexOf p s' length :: String -> Int @@ -125,7 +129,7 @@ foreign import singleton :: CodePoint -> String splitAt :: Int -> String -> Maybe { before :: String, after :: String } splitAt i s = let cps = toCodePointArray s in - if i < 0 || Array.length cps <= i + if i < 0 || Array.length cps < i then Nothing else Just { before: fromCodePointArray (Array.take i cps), diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index 28e289c..d7dac6a 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -1,6 +1,6 @@ module Test.Data.String.CodePoints (testStringCodePoints) where -import Prelude (Unit, Ordering(..), (==), ($), discard, negate, not, (/=), (&&), (<)) +import Prelude (Unit, discard, negate, (==), ($), (&&), (<), (<$>)) import Control.Monad.Eff (Eff) import Control.Monad.Eff.Console (CONSOLE, log) @@ -54,6 +54,7 @@ testStringCodePoints = do log "indexOf" assert $ indexOf (Pattern "") "" == Just 0 assert $ indexOf (Pattern "") str == Just 0 + assert $ indexOf (Pattern str) str == Just 0 assert $ indexOf (Pattern "a") str == Just 0 assert $ indexOf (Pattern "\xDC00\xD800\xD800") str == Just 1 assert $ indexOf (Pattern "\xD800") str == Just 2 @@ -71,73 +72,119 @@ testStringCodePoints = do -- I vote we just delete the test. Passing surrogate halves to the CodePoints functions should not be supported. assert $ indexOf (Pattern "\xDC05") str == Just 5 --- log "singleton" --- assert $ singleton 'a' == "a" --- --- log "takeWhile" --- assert $ takeWhile (\c -> true) "abc" == "abc" --- assert $ takeWhile (\c -> false) "abc" == "" --- assert $ takeWhile (\c -> c /= 'b') "aabbcc" == "aa" --- --- log "indexOf'" --- assert $ indexOf' (Pattern "") 0 "" == Just 0 --- assert $ indexOf' (Pattern "") (-1) "ab" == Nothing --- assert $ indexOf' (Pattern "") 0 "ab" == Just 0 --- assert $ indexOf' (Pattern "") 1 "ab" == Just 1 --- assert $ indexOf' (Pattern "") 2 "ab" == Just 2 --- assert $ indexOf' (Pattern "") 3 "ab" == Nothing --- assert $ indexOf' (Pattern "bc") 0 "abcd" == Just 1 --- assert $ indexOf' (Pattern "bc") 1 "abcd" == Just 1 --- assert $ indexOf' (Pattern "bc") 2 "abcd" == Nothing --- assert $ indexOf' (Pattern "cb") 0 "abcd" == Nothing --- --- log "lastIndexOf" --- assert $ lastIndexOf (Pattern "") "" == Just 0 --- assert $ lastIndexOf (Pattern "") "abcd" == Just 4 --- assert $ lastIndexOf (Pattern "bc") "abcd" == Just 1 --- assert $ lastIndexOf (Pattern "cb") "abcd" == Nothing --- --- log "lastIndexOf'" --- assert $ lastIndexOf' (Pattern "") 0 "" == Just 0 --- assert $ lastIndexOf' (Pattern "") (-1) "ab" == Nothing --- assert $ lastIndexOf' (Pattern "") 0 "ab" == Just 0 --- assert $ lastIndexOf' (Pattern "") 1 "ab" == Just 1 --- assert $ lastIndexOf' (Pattern "") 2 "ab" == Just 2 --- assert $ lastIndexOf' (Pattern "") 3 "ab" == Nothing --- assert $ lastIndexOf' (Pattern "bc") 0 "abcd" == Nothing --- assert $ lastIndexOf' (Pattern "bc") 1 "abcd" == Just 1 --- assert $ lastIndexOf' (Pattern "bc") 2 "abcd" == Just 1 --- assert $ lastIndexOf' (Pattern "cb") 0 "abcd" == Nothing --- --- log "length" --- assert $ length "" == 0 --- assert $ length "a" == 1 --- assert $ length "ab" == 2 --- --- log "take" --- assert $ take 0 "ab" == "" --- assert $ take 1 "ab" == "a" --- assert $ take 2 "ab" == "ab" --- assert $ take 3 "ab" == "ab" --- assert $ take (-1) "ab" == "" --- --- log "count" --- assert $ count (_ == 'a') "" == 0 --- assert $ count (_ == 'a') "ab" == 1 --- assert $ count (_ == 'a') "aaab" == 3 --- assert $ count (_ == 'a') "abaa" == 1 --- --- log "splitAt" --- let testSplitAt i str res = --- assert $ case splitAt i str of --- Nothing -> --- isNothing res --- Just { before, after } -> --- maybe false (\r -> --- r.before == before && r.after == after) res --- --- testSplitAt 1 "" Nothing --- testSplitAt 0 "a" $ Just {before: "", after: "a"} --- testSplitAt 1 "ab" $ Just {before: "a", after: "b"} --- testSplitAt 3 "aabcc" $ Just {before: "aab", after: "cc"} --- testSplitAt (-1) "abc" $ Nothing + log "indexOf'" + assert $ indexOf' (Pattern "") 0 "" == Just 0 + assert $ indexOf' (Pattern str) 0 str == Just 0 + assert $ indexOf' (Pattern str) 1 str == Nothing + assert $ indexOf' (Pattern "a") 0 str == Just 0 + assert $ indexOf' (Pattern "a") 1 str == Nothing + assert $ indexOf' (Pattern "z") 0 str == Just 6 + assert $ indexOf' (Pattern "z") 1 str == Just 6 + assert $ indexOf' (Pattern "z") 2 str == Just 6 + assert $ indexOf' (Pattern "z") 3 str == Just 6 + assert $ indexOf' (Pattern "z") 4 str == Just 6 + assert $ indexOf' (Pattern "z") 5 str == Just 6 + assert $ indexOf' (Pattern "z") 6 str == Just 6 + assert $ indexOf' (Pattern "z") 7 str == Nothing + + log "lastIndexOf" + assert $ lastIndexOf (Pattern "") "" == Just 0 + assert $ lastIndexOf (Pattern "") str == Just 7 + assert $ lastIndexOf (Pattern str) str == Just 0 + assert $ lastIndexOf (Pattern "a") str == Just 0 + assert $ lastIndexOf (Pattern "\xDC00\xD800\xD800") str == Just 1 + assert $ lastIndexOf (Pattern "\xD800") str == Just 3 + assert $ lastIndexOf (Pattern "\xD800\xD800") str == Just 2 + assert $ lastIndexOf (Pattern "\xD800\xD81A") str == Just 3 + assert $ lastIndexOf (Pattern "\xD800\x16805") str == Just 3 + assert $ lastIndexOf (Pattern "\x16805") str == Just 4 + assert $ lastIndexOf (Pattern "\x16A06") str == Just 5 + assert $ lastIndexOf (Pattern "z") str == Just 6 + assert $ lastIndexOf (Pattern "\0") str == Nothing + assert $ lastIndexOf (Pattern "\xD81A") str == Just 5 + + log "lastIndexOf'" + assert $ lastIndexOf' (Pattern "") 0 "" == Just 0 + assert $ lastIndexOf' (Pattern str) 0 str == Just 0 + assert $ lastIndexOf' (Pattern str) 1 str == Nothing + assert $ lastIndexOf' (Pattern "a") 0 str == Just 0 + assert $ lastIndexOf' (Pattern "a") 1 str == Nothing + assert $ lastIndexOf' (Pattern "z") 0 str == Just 6 + assert $ lastIndexOf' (Pattern "z") 1 str == Just 6 + assert $ lastIndexOf' (Pattern "z") 2 str == Just 6 + assert $ lastIndexOf' (Pattern "z") 3 str == Just 6 + assert $ lastIndexOf' (Pattern "z") 4 str == Just 6 + assert $ lastIndexOf' (Pattern "z") 5 str == Just 6 + assert $ lastIndexOf' (Pattern "z") 6 str == Just 6 + assert $ lastIndexOf' (Pattern "z") 7 str == Nothing + + log "length" + assert $ length "" == 0 + assert $ length "a" == 1 + assert $ length "ab" == 2 + assert $ length str == 7 + + log "singleton" + assert $ (singleton <$> codePointFromInt 0x30) == Just "0" + assert $ (singleton <$> codePointFromInt 0x16805) == Just "\x16805" + + log "splitAt" + let testSplitAt i s res = + assert $ case splitAt i s of + Nothing -> + isNothing res + Just { before, after } -> + maybe false (\r -> + r.before == before && r.after == after) res + + testSplitAt 0 "" $ Just {before: "", after: ""} + testSplitAt 1 "" Nothing + testSplitAt 0 "a" $ Just {before: "", after: "a"} + testSplitAt 1 "ab" $ Just {before: "a", after: "b"} + testSplitAt 3 "aabcc" $ Just {before: "aab", after: "cc"} + testSplitAt (-1) "abc" $ Nothing + testSplitAt 0 str $ Just {before: "", after: str} + testSplitAt 1 str $ Just {before: "a", after: "\xDC00\xD800\xD800\x16805\x16A06\&z"} + testSplitAt 2 str $ Just {before: "a\xDC00", after: "\xD800\xD800\x16805\x16A06\&z"} + testSplitAt 3 str $ Just {before: "a\xDC00\xD800", after: "\xD800\x16805\x16A06\&z"} + testSplitAt 4 str $ Just {before: "a\xDC00\xD800\xD800", after: "\x16805\x16A06\&z"} + testSplitAt 5 str $ Just {before: "a\xDC00\xD800\xD800\x16805", after: "\x16A06\&z"} + testSplitAt 6 str $ Just {before: "a\xDC00\xD800\xD800\x16805\x16A06", after: "z"} + testSplitAt 7 str $ Just {before: str, after: ""} + testSplitAt 8 str $ Nothing + + log "take" + assert $ take (-1) str == "" + assert $ take 0 str == "" + assert $ take 1 str == "a" + assert $ take 2 str == "a\xDC00" + assert $ take 3 str == "a\xDC00\xD800" + assert $ take 4 str == "a\xDC00\xD800\xD800" + assert $ take 5 str == "a\xDC00\xD800\xD800\x16805" + assert $ take 6 str == "a\xDC00\xD800\xD800\x16805\x16A06" + assert $ take 7 str == str + assert $ take 8 str == str + + log "takeWhile" + assert $ takeWhile (\c -> true) str == str + assert $ takeWhile (\c -> false) str == "" + assert $ takeWhile (\c -> codePointToInt c < 0xFFFF) str == "a\xDC00\xD800\xD800" + assert $ takeWhile (\c -> codePointToInt c < 0xDC00) str == "a" + + log "uncons" + let testUncons s res = + assert $ case uncons s of + Nothing -> + isNothing res + Just { head, tail } -> + maybe false (\r -> + r.head == codePointToInt head && r.tail == tail) res + + testUncons str $ Just {head: 0x61, tail: "\xDC00\xD800\xD800\x16805\x16A06\&z"} + testUncons (drop 1 str) $ Just {head: 0xDC00, tail: "\xD800\xD800\x16805\x16A06\&z"} + testUncons (drop 2 str) $ Just {head: 0xD800, tail: "\xD800\x16805\x16A06\&z"} + testUncons (drop 3 str) $ Just {head: 0xD800, tail: "\x16805\x16A06\&z"} + testUncons (drop 4 str) $ Just {head: 0x16805, tail: "\x16A06\&z"} + testUncons (drop 5 str) $ Just {head: 0x16A06, tail: "z"} + testUncons (drop 6 str) $ Just {head: 0x7A, tail: ""} + testUncons "" Nothing From 04154a5bee241c48f2a70f6417280e7f8ea587f4 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sat, 27 May 2017 16:27:07 -0600 Subject: [PATCH 19/48] fix linting errors --- src/Data/String/CodePoints.js | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index c1d9163..c1c982a 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -1,11 +1,21 @@ -var hasArrayFrom = typeof Array.from === 'function'; +"use strict"; +/* global Symbol */ + +var hasArrayFrom = typeof Array.from === "function"; var hasStringIterator = - typeof Symbol !== 'undefined' && + typeof Symbol !== "undefined" && Symbol != null && - typeof Symbol.iterator !== 'undefined' && - typeof String.prototype[Symbol.iterator] === 'function'; -var hasFromCodePoint = typeof String.prototype.fromCodePoint === 'function'; -var hasCodePointAt = typeof String.prototype.codePointAt === 'function'; + typeof Symbol.iterator !== "undefined" && + typeof String.prototype[Symbol.iterator] === "function"; +var hasFromCodePoint = typeof String.prototype.fromCodePoint === "function"; +var hasCodePointAt = typeof String.prototype.codePointAt === "function"; + +function fromCodePoint(cp) { + if (cp <= 0xFFFF) return String.fromCharCode(cp); + var cu1 = String.fromCharCode(Math.floor((cp - 0x10000) / 0x400) + 0xD800); + var cu2 = String.fromCharCode((cp - 0x10000) % 0x400 + 0xDC00); + return cu1 + cu2; +} exports._codePointAt = function (fallback) { return function (Just) { @@ -23,7 +33,7 @@ exports._codePointAt = function (fallback) { for (var i = index;; --i) { var o = iter.next(); if (o.done) return Nothing; - if (i == 0) return Just(o.value); + if (i === 0) return Just(o.value); } } return fallback(index)(str); @@ -61,7 +71,7 @@ exports._count = function (isLead) { exports.fromCodePointArray = hasFromCodePoint ? function (cps) { return String.fromCodePoint.apply(String, cps); } - : function (cps) { return cps.map(fromCodePoint).join(''); }; + : function (cps) { return cps.map(fromCodePoint).join(""); }; exports.singleton = hasFromCodePoint ? String.fromCodePoint : fromCodePoint; @@ -69,7 +79,7 @@ exports._take = function (fallback) { return function (n) { if (hasArrayFrom) { return function (str) { - return Array.from(str).slice(0, Math.max(0, n)).join(''); + return Array.from(str).slice(0, Math.max(0, n)).join(""); }; } else if (hasStringIterator) { return function (str) { @@ -105,10 +115,3 @@ exports._toCodePointArray = function (fallback) { } return fallback; }; - -function fromCodePoint(cp) { - if (cp <= 0xFFFF) return String.fromCharCode(cp); - var cu1 = String.fromCharCode(Math.floor((cp - 0x10000) / 0x400) + 0xD800); - var cu2 = String.fromCharCode((cp - 0x10000) % 0x400 + 0xDC00); - return cu1 + cu2; -} From c798dfe490651b219b980664b5a5462cfd2430cf Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sat, 27 May 2017 16:46:59 -0600 Subject: [PATCH 20/48] change re-export of Data.String --- src/Data/String.purs | 4 ++-- src/Data/String/CodePoints.purs | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Data/String.purs b/src/Data/String.purs index 22be682..89ad23c 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -36,11 +36,11 @@ module Data.String , joinWith ) where -import Prelude +import Prelude (class Ord, class Eq, class Show, Ordering(LT, EQ, GT), zero, one, (<<<), (<>), (==), ($), (-)) import Data.Maybe (Maybe(..), isJust) import Data.Newtype (class Newtype) -import Data.String.Unsafe as U +import Data.String.Unsafe (charAt) as U -- | A newtype used in cases where there is a string to be matched. newtype Pattern = Pattern String diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index f6b49ed..c968adc 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -27,7 +27,8 @@ import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) import Data.Newtype (class Newtype) import Data.String as String -import Data.String hiding (count, drop, dropWhile, indexOf, indexOf', lastIndexOf, lastIndexOf', length, singleton, splitAt, take, takeWhile, uncons) as StringReExports +-- WARN: This list must be updated to re-export any exports added to Data.String. That makes me sad. +import Data.String (Pattern(..), Replacement(..), charAt, charCodeAt, contains, fromCharArray, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toChar, toCharArray, toLower, toUpper, trim) as StringReExports import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) import Prelude (class Eq, class Ord, (&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<)) From 71cdcf299d1acb32e702b4055fe65a721fa171b6 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sun, 28 May 2017 14:45:12 -0600 Subject: [PATCH 21/48] bugfixes --- src/Data/String/CodePoints.js | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index c1c982a..7c1257c 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -17,6 +17,15 @@ function fromCodePoint(cp) { return cu1 + cu2; } +var codePointAt0 = hasCodePointAt + ? function (str) { return str.codePointAt(0); } + : function (str) { + if (str.length === 1) { + return str.charCodeAt(0); + } + return ((str.charCodeAt(0) - 0xD800) * 0x400 + (str.charCodeAt(1) - 0xDC00) + 0x10000); + }; + exports._codePointAt = function (fallback) { return function (Just) { return function (Nothing) { @@ -24,16 +33,16 @@ exports._codePointAt = function (fallback) { return function (str) { var length = str.length; if (index < 0 || index >= length) return Nothing; - if (hasArrayFrom && hasCodePointAt) { + if (hasArrayFrom) { var cps = Array.from(str); if (index >= cps.length) return Nothing; - return Just(cps[index].codePointAt(0)); + return Just(codePointAt0(cps[index])); } else if (hasStringIterator) { var iter = str[Symbol.iterator](); for (var i = index;; --i) { var o = iter.next(); if (o.done) return Nothing; - if (i === 0) return Just(o.value); + if (i === 0) return Just(codePointAt0(o.value)); } } return fallback(index)(str); @@ -70,6 +79,7 @@ exports._count = function (isLead) { }; exports.fromCodePointArray = hasFromCodePoint + // TODO: using F.p.apply here will fail for very large strings; use alternative implementation for very large strings ? function (cps) { return String.fromCodePoint.apply(String, cps); } : function (cps) { return cps.map(fromCodePoint).join(""); }; @@ -93,23 +103,23 @@ exports._take = function (fallback) { return accum; }; } - return fallback; + return fallback(n); }; }; exports._toCodePointArray = function (fallback) { - if (hasArrayFrom && hasCodePointAt) { + if (hasArrayFrom) { return function (str) { - return Array.from(str, function (x) { return x.codePointAt(0); }); + return Array.from(str, codePointAt0); }; - } else if (hasStringIterator && hasCodePointAt) { + } else if (hasStringIterator) { return function (str) { var accum = []; var iter = str[Symbol.iterator](); for (;;) { var o = iter.next(); if (o.done) return accum; - accum.push(o.value.codePointAt(0)); + accum.push(codePointAt0(o.value)); } }; } From 2c2418aa41cf547b53c210361c06a1f62faaae1c Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sun, 28 May 2017 15:33:18 -0600 Subject: [PATCH 22/48] move fromCodePoint from JS to purs --- src/Data/String/CodePoints.js | 21 +++++++++------------ src/Data/String/CodePoints.purs | 32 +++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 7c1257c..2ae2481 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -10,13 +10,6 @@ var hasStringIterator = var hasFromCodePoint = typeof String.prototype.fromCodePoint === "function"; var hasCodePointAt = typeof String.prototype.codePointAt === "function"; -function fromCodePoint(cp) { - if (cp <= 0xFFFF) return String.fromCharCode(cp); - var cu1 = String.fromCharCode(Math.floor((cp - 0x10000) / 0x400) + 0xD800); - var cu2 = String.fromCharCode((cp - 0x10000) % 0x400 + 0xDC00); - return cu1 + cu2; -} - var codePointAt0 = hasCodePointAt ? function (str) { return str.codePointAt(0); } : function (str) { @@ -78,12 +71,16 @@ exports._count = function (isLead) { }; }; -exports.fromCodePointArray = hasFromCodePoint - // TODO: using F.p.apply here will fail for very large strings; use alternative implementation for very large strings - ? function (cps) { return String.fromCodePoint.apply(String, cps); } - : function (cps) { return cps.map(fromCodePoint).join(""); }; +exports._fromCodePointArray = function (singleton) { + return hasFromCodePoint + // TODO: using F.p.apply here will fail for very large strings; use alternative implementation for very large strings + ? function (cps) { return String.fromCodePoint.apply(String, cps); } + : function (cps) { return cps.map(singleton).join(""); }; +}; -exports.singleton = hasFromCodePoint ? String.fromCodePoint : fromCodePoint; +exports._singleton = function (fallback) { + return hasFromCodePoint ? String.fromCodePoint : fallback; +}; exports._take = function (fallback) { return function (n) { diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index c968adc..eaccf9d 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -22,7 +22,7 @@ module Data.String.CodePoints ) where import Data.Array as Array -import Data.Char (toCharCode) +import Data.Char as Char import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) import Data.Newtype (class Newtype) @@ -31,7 +31,7 @@ import Data.String as String import Data.String (Pattern(..), Replacement(..), charAt, charCodeAt, contains, fromCharArray, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toChar, toCharArray, toLower, toUpper, trim) as StringReExports import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) -import Prelude (class Eq, class Ord, (&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<)) +import Prelude (class Eq, class Ord, (&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<), (/), (<>), mod) newtype CodePoint = CodePoint Int @@ -61,6 +61,9 @@ isLead cu = 0xD800 <= cu && cu <= 0xDBFF isTrail :: Int -> Boolean isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF +fromCharCode :: Int -> String +fromCharCode = String.singleton <<< Char.fromCharCode + codePointAt :: Int -> String -> Maybe CodePoint codePointAt = _codePointAt codePointAtFallback Just Nothing @@ -97,7 +100,13 @@ dropWhile :: (CodePoint -> Boolean) -> String -> String dropWhile p s = drop (count p s) s -foreign import fromCodePointArray :: Array CodePoint -> String +fromCodePointArray :: Array CodePoint -> String +fromCodePointArray = _fromCodePointArray singletonFallback + +foreign import _fromCodePointArray + :: (CodePoint -> String) + -> Array CodePoint + -> String indexOf :: String.Pattern -> String -> Maybe Int @@ -124,7 +133,20 @@ length :: String -> Int length = Array.length <<< toCodePointArray -foreign import singleton :: CodePoint -> String +singleton :: CodePoint -> String +singleton = _singleton singletonFallback + +foreign import _singleton + :: (CodePoint -> String) + -> CodePoint + -> String + +singletonFallback :: CodePoint -> String +singletonFallback (CodePoint cp) | cp <= 0xFFFF = fromCharCode cp +singletonFallback (CodePoint cp) = fromCharCode lead <> fromCharCode trail + where + lead = ((cp - 0x10000) / 0x400) + 0xD800 + trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 splitAt :: Int -> String -> Maybe { before :: String, after :: String } @@ -160,7 +182,7 @@ foreign import _toCodePointArray -> Array CodePoint toCodePointArrayFallback :: String -> Array CodePoint -toCodePointArrayFallback s = unfoldr decode (fromFoldable (toCharCode <$> String.toCharArray s)) +toCodePointArrayFallback s = unfoldr decode (fromFoldable (Char.toCharCode <$> String.toCharArray s)) where decode :: List Int -> Maybe (Tuple CodePoint (List Int)) decode (Cons h (Cons l rest)) | isLead h && isTrail l From 46e9545c0d3d3c3168097131f770baf1e7e7995b Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sun, 28 May 2017 15:54:08 -0600 Subject: [PATCH 23/48] move codePointAt0 from JS to purs --- src/Data/String/CodePoints.js | 85 ++++++++++++++++++--------------- src/Data/String/CodePoints.purs | 31 ++++++++++-- 2 files changed, 72 insertions(+), 44 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 2ae2481..58117af 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -10,35 +10,40 @@ var hasStringIterator = var hasFromCodePoint = typeof String.prototype.fromCodePoint === "function"; var hasCodePointAt = typeof String.prototype.codePointAt === "function"; -var codePointAt0 = hasCodePointAt - ? function (str) { return str.codePointAt(0); } - : function (str) { - if (str.length === 1) { - return str.charCodeAt(0); - } - return ((str.charCodeAt(0) - 0xD800) * 0x400 + (str.charCodeAt(1) - 0xDC00) + 0x10000); +exports._unsafeCharCodeAt = function (i) { + return function (str) { + return str.charCodeAt(i); }; +}; + +exports._unsafeCodePointAt0 = function (fallback) { + return hasCodePointAt + ? function (str) { return str.codePointAt(0); } + : fallback; +}; exports._codePointAt = function (fallback) { return function (Just) { return function (Nothing) { - return function (index) { - return function (str) { - var length = str.length; - if (index < 0 || index >= length) return Nothing; - if (hasArrayFrom) { - var cps = Array.from(str); - if (index >= cps.length) return Nothing; - return Just(codePointAt0(cps[index])); - } else if (hasStringIterator) { - var iter = str[Symbol.iterator](); - for (var i = index;; --i) { - var o = iter.next(); - if (o.done) return Nothing; - if (i === 0) return Just(codePointAt0(o.value)); + return function (unsafeCodePointAt0) { + return function (index) { + return function (str) { + var length = str.length; + if (index < 0 || index >= length) return Nothing; + if (hasArrayFrom) { + var cps = Array.from(str); + if (index >= cps.length) return Nothing; + return Just(unsafeCodePointAt0(cps[index])); + } else if (hasStringIterator) { + var iter = str[Symbol.iterator](); + for (var i = index;; --i) { + var o = iter.next(); + if (o.done) return Nothing; + if (i === 0) return Just(unsafeCodePointAt0(o.value)); + } } - } - return fallback(index)(str); + return fallback(index)(str); + }; }; }; }; @@ -105,20 +110,22 @@ exports._take = function (fallback) { }; exports._toCodePointArray = function (fallback) { - if (hasArrayFrom) { - return function (str) { - return Array.from(str, codePointAt0); - }; - } else if (hasStringIterator) { - return function (str) { - var accum = []; - var iter = str[Symbol.iterator](); - for (;;) { - var o = iter.next(); - if (o.done) return accum; - accum.push(codePointAt0(o.value)); - } - }; - } - return fallback; + return function (unsafeCodePointAt0) { + if (hasArrayFrom) { + return function (str) { + return Array.from(str, unsafeCodePointAt0); + }; + } else if (hasStringIterator) { + return function (str) { + var accum = []; + var iter = str[Symbol.iterator](); + for (;;) { + var o = iter.next(); + if (o.done) return accum; + accum.push(unsafeCodePointAt0(o.value)); + } + }; + } + return fallback; + }; }; diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index eaccf9d..ab14ccb 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -31,7 +31,7 @@ import Data.String as String import Data.String (Pattern(..), Replacement(..), charAt, charCodeAt, contains, fromCharArray, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toChar, toCharArray, toLower, toUpper, trim) as StringReExports import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) -import Prelude (class Eq, class Ord, (&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<), (/), (<>), mod) +import Prelude (class Eq, class Ord, (&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<), (/), (<>), (==), mod) newtype CodePoint = CodePoint Int @@ -48,8 +48,8 @@ codePointToInt :: CodePoint -> Int codePointToInt (CodePoint n) = n codePointFromSurrogatePair :: Int -> Int -> Maybe CodePoint -codePointFromSurrogatePair lead trail | isLead lead && isTrail trail - = Just (unsurrogate lead trail) +codePointFromSurrogatePair lead trail | isLead lead && isTrail trail = + Just (unsurrogate lead trail) codePointFromSurrogatePair _ _ = Nothing unsurrogate :: Int -> Int -> CodePoint @@ -64,14 +64,34 @@ isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF fromCharCode :: Int -> String fromCharCode = String.singleton <<< Char.fromCharCode +unsafeCodePointAt0 :: String -> CodePoint +unsafeCodePointAt0 = _unsafeCodePointAt0 unsafeCodePointAt0Fallback + +foreign import _unsafeCodePointAt0 + :: (String -> CodePoint) + -> String + -> CodePoint + +unsafeCodePointAt0Fallback :: String -> CodePoint +unsafeCodePointAt0Fallback s | String.length s == 1 = CodePoint (_unsafeCharCodeAt 0 s) +unsafeCodePointAt0Fallback s = CodePoint (((lead - 0xD800) * 0x400) + (trail - 0xDC00) + 0x10000) + where + lead = _unsafeCharCodeAt 0 s + trail = _unsafeCharCodeAt 1 s + +foreign import _unsafeCharCodeAt :: Int -> String -> Int + codePointAt :: Int -> String -> Maybe CodePoint -codePointAt = _codePointAt codePointAtFallback Just Nothing +codePointAt 0 "" = Nothing +codePointAt 0 s = Just (unsafeCodePointAt0 s) +codePointAt n s = _codePointAt codePointAtFallback Just Nothing unsafeCodePointAt0 n s foreign import _codePointAt :: (Int -> String -> Maybe CodePoint) -> (forall a. a -> Maybe a) -> (forall a. Maybe a) + -> (String -> CodePoint) -> Int -> String -> Maybe CodePoint @@ -174,10 +194,11 @@ takeWhile p s = take (count p s) s toCodePointArray :: String -> Array CodePoint -toCodePointArray = _toCodePointArray toCodePointArrayFallback +toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0 foreign import _toCodePointArray :: (String -> Array CodePoint) + -> (String -> CodePoint) -> String -> Array CodePoint From c59f34028a1cd7ea9de96d732c938d27e5ea3183 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Mon, 29 May 2017 11:30:46 -0700 Subject: [PATCH 24/48] remove TODOs --- src/Data/String/CodePoints.js | 8 ++++++-- test/Test/Data/String/CodePoints.purs | 5 ----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 58117af..1875fab 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -78,8 +78,12 @@ exports._count = function (isLead) { exports._fromCodePointArray = function (singleton) { return hasFromCodePoint - // TODO: using F.p.apply here will fail for very large strings; use alternative implementation for very large strings - ? function (cps) { return String.fromCodePoint.apply(String, cps); } + ? function (cps) { + if (cps.length < 10240) { + return String.fromCodePoint.apply(String, cps); + } + return cps.map(singleton).join(""); + } : function (cps) { return cps.map(singleton).join(""); }; }; diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index d7dac6a..46dcc9f 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -66,11 +66,6 @@ testStringCodePoints = do assert $ indexOf (Pattern "z") str == Just 6 assert $ indexOf (Pattern "\0") str == Nothing assert $ indexOf (Pattern "\xD81A") str == Just 4 - -- TODO: Should this be Nothing? It matches the trail surrogate of a surrogate pair. - -- It'd be nice if (drop (indexOf pattern str) str) was guaranteed to start with pattern. - -- If we change this, we'll also need to add a matching contains implementation to the CodePoints module. - -- I vote we just delete the test. Passing surrogate halves to the CodePoints functions should not be supported. - assert $ indexOf (Pattern "\xDC05") str == Just 5 log "indexOf'" assert $ indexOf' (Pattern "") 0 "" == Just 0 From 71c5156e74a5569d81c9b43906dbc111e25e4c36 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Wed, 31 May 2017 20:56:23 -0700 Subject: [PATCH 25/48] use charCodeAt from Data.String.Unsafe --- src/Data/String/CodePoints.js | 6 ------ src/Data/String/CodePoints.purs | 9 ++++----- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 1875fab..1a9b534 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -10,12 +10,6 @@ var hasStringIterator = var hasFromCodePoint = typeof String.prototype.fromCodePoint === "function"; var hasCodePointAt = typeof String.prototype.codePointAt === "function"; -exports._unsafeCharCodeAt = function (i) { - return function (str) { - return str.charCodeAt(i); - }; -}; - exports._unsafeCodePointAt0 = function (fallback) { return hasCodePointAt ? function (str) { return str.codePointAt(0); } diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index ab14ccb..2ab7f3f 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -27,6 +27,7 @@ import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) import Data.Newtype (class Newtype) import Data.String as String +import Data.String.Unsafe as Unsafe -- WARN: This list must be updated to re-export any exports added to Data.String. That makes me sad. import Data.String (Pattern(..), Replacement(..), charAt, charCodeAt, contains, fromCharArray, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toChar, toCharArray, toLower, toUpper, trim) as StringReExports import Data.Tuple (Tuple(Tuple)) @@ -73,13 +74,11 @@ foreign import _unsafeCodePointAt0 -> CodePoint unsafeCodePointAt0Fallback :: String -> CodePoint -unsafeCodePointAt0Fallback s | String.length s == 1 = CodePoint (_unsafeCharCodeAt 0 s) +unsafeCodePointAt0Fallback s | String.length s == 1 = CodePoint (Unsafe.charCodeAt 0 s) unsafeCodePointAt0Fallback s = CodePoint (((lead - 0xD800) * 0x400) + (trail - 0xDC00) + 0x10000) where - lead = _unsafeCharCodeAt 0 s - trail = _unsafeCharCodeAt 1 s - -foreign import _unsafeCharCodeAt :: Int -> String -> Int + lead = Unsafe.charCodeAt 0 s + trail = Unsafe.charCodeAt 1 s codePointAt :: Int -> String -> Maybe CodePoint From e8ca6f36278fcce565d83a313f4e607ffb2b52bc Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sun, 4 Jun 2017 21:32:23 -0700 Subject: [PATCH 26/48] open imports for Prelude --- src/Data/String.purs | 2 +- src/Data/String/CodePoints.purs | 3 ++- test/Test/Data/String/CodePoints.purs | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Data/String.purs b/src/Data/String.purs index 89ad23c..2bd2c94 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -36,7 +36,7 @@ module Data.String , joinWith ) where -import Prelude (class Ord, class Eq, class Show, Ordering(LT, EQ, GT), zero, one, (<<<), (<>), (==), ($), (-)) +import Prelude import Data.Maybe (Maybe(..), isJust) import Data.Newtype (class Newtype) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 2ab7f3f..71ffa85 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -21,6 +21,8 @@ module Data.String.CodePoints , uncons ) where +import Prelude + import Data.Array as Array import Data.Char as Char import Data.List (List(Cons, Nil), fromFoldable) @@ -32,7 +34,6 @@ import Data.String.Unsafe as Unsafe import Data.String (Pattern(..), Replacement(..), charAt, charCodeAt, contains, fromCharArray, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toChar, toCharArray, toLower, toUpper, trim) as StringReExports import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) -import Prelude (class Eq, class Ord, (&&), (||), (*), (+), (-), (<$>), (<), (<=), (<<<), (/), (<>), (==), mod) newtype CodePoint = CodePoint Int diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index 46dcc9f..f6844cd 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -1,6 +1,6 @@ module Test.Data.String.CodePoints (testStringCodePoints) where -import Prelude (Unit, discard, negate, (==), ($), (&&), (<), (<$>)) +import Prelude import Control.Monad.Eff (Eff) import Control.Monad.Eff.Console (CONSOLE, log) From 8d6d263bac2ddae7c605bda329adf55854cf10e2 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Mon, 5 Jun 2017 00:09:52 -0700 Subject: [PATCH 27/48] add some comments --- src/Data/String/CodePoints.js | 2 ++ src/Data/String/CodePoints.purs | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 1a9b534..34b7aa6 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -73,6 +73,8 @@ exports._count = function (isLead) { exports._fromCodePointArray = function (singleton) { return hasFromCodePoint ? function (cps) { + // Function.prototype.apply will fail for very large second parameters, + // so we don't use it for arrays with 10KB or more entries. if (cps.length < 10240) { return String.fromCodePoint.apply(String, cps); } diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 71ffa85..dbc4294 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -1,3 +1,7 @@ +-- | These functions allow PureScript strings to be treated as if they were +-- | sequences of Unicode code points instead of their true underlying +-- | implementation (sequences of UTF-16 code units). For nearly all uses of +-- | strings, these functions should be preferred over the ones in Data.String. module Data.String.CodePoints ( module StringReExports , CodePoint() @@ -36,12 +40,17 @@ import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) +-- | CodePoint is an Int bounded between 0 and 0x10FFFF, corresponding to +-- | Unicode code points. newtype CodePoint = CodePoint Int derive instance eqCodePoint :: Eq CodePoint derive instance ordCodePoint :: Ord CodePoint derive instance newtypeCodePoint :: Newtype CodePoint _ +-- I would prefer that this smart constructor not need to exist and instead +-- CodePoint just implements Enum, but the Enum module already depends on this +-- one. To avoid the circular dependency, we just expose these two functions. codePointFromInt :: Int -> Maybe CodePoint codePointFromInt n | 0 <= n && n <= 0x10FFFF = Just (CodePoint n) codePointFromInt n = Nothing From 8e99c3996dcfe5349b73a7b7d87f1237cb3068ae Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Mon, 5 Jun 2017 00:11:23 -0700 Subject: [PATCH 28/48] remove unused parameters --- test/Test/Data/String/CodePoints.purs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index f6844cd..457ba80 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -46,8 +46,8 @@ testStringCodePoints = do assert $ drop 8 str == "" log "dropWhile" - assert $ dropWhile (\c -> true) str == "" - assert $ dropWhile (\c -> false) str == str + assert $ dropWhile (\_ -> true) str == "" + assert $ dropWhile (\_ -> false) str == str assert $ dropWhile (\c -> codePointToInt c < 0xFFFF) str == "\x16805\x16A06\&z" assert $ dropWhile (\c -> codePointToInt c < 0xDC00) str == "\xDC00\xD800\xD800\x16805\x16A06\&z" @@ -161,8 +161,8 @@ testStringCodePoints = do assert $ take 8 str == str log "takeWhile" - assert $ takeWhile (\c -> true) str == str - assert $ takeWhile (\c -> false) str == "" + assert $ takeWhile (\_ -> true) str == str + assert $ takeWhile (\_ -> false) str == "" assert $ takeWhile (\c -> codePointToInt c < 0xFFFF) str == "a\xDC00\xD800\xD800" assert $ takeWhile (\c -> codePointToInt c < 0xDC00) str == "a" From 557186c275894defa4a74d6ff2aa0dc09ad5b448 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Mon, 5 Jun 2017 00:17:54 -0700 Subject: [PATCH 29/48] remove some redundant JS implementations --- src/Data/String/CodePoints.js | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 34b7aa6..aefe6a6 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -80,7 +80,9 @@ exports._fromCodePointArray = function (singleton) { } return cps.map(singleton).join(""); } - : function (cps) { return cps.map(singleton).join(""); }; + : function (cps) { + return cps.map(singleton).join(""); + }; }; exports._singleton = function (fallback) { @@ -89,11 +91,7 @@ exports._singleton = function (fallback) { exports._take = function (fallback) { return function (n) { - if (hasArrayFrom) { - return function (str) { - return Array.from(str).slice(0, Math.max(0, n)).join(""); - }; - } else if (hasStringIterator) { + if (hasStringIterator) { return function (str) { var accum = ""; var iter = str[Symbol.iterator](); @@ -115,16 +113,6 @@ exports._toCodePointArray = function (fallback) { return function (str) { return Array.from(str, unsafeCodePointAt0); }; - } else if (hasStringIterator) { - return function (str) { - var accum = []; - var iter = str[Symbol.iterator](); - for (;;) { - var o = iter.next(); - if (o.done) return accum; - accum.push(unsafeCodePointAt0(o.value)); - } - }; } return fallback; }; From 4ec116b76cc856bac1d990632836b16bbfcf6246 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Mon, 5 Jun 2017 00:32:13 -0700 Subject: [PATCH 30/48] remove unnecessary qualification in import --- src/Data/String.purs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/String.purs b/src/Data/String.purs index 2bd2c94..22be682 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -40,7 +40,7 @@ import Prelude import Data.Maybe (Maybe(..), isJust) import Data.Newtype (class Newtype) -import Data.String.Unsafe (charAt) as U +import Data.String.Unsafe as U -- | A newtype used in cases where there is a string to be matched. newtype Pattern = Pattern String From 5490d4679e4ce9e8ad4b52a3ad37bebeaa389474 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Mon, 12 Jun 2017 09:48:01 -0700 Subject: [PATCH 31/48] prefer 10e3 over 1024e1 --- src/Data/String/CodePoints.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index aefe6a6..87fb2c5 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -74,8 +74,8 @@ exports._fromCodePointArray = function (singleton) { return hasFromCodePoint ? function (cps) { // Function.prototype.apply will fail for very large second parameters, - // so we don't use it for arrays with 10KB or more entries. - if (cps.length < 10240) { + // so we don't use it for arrays with 10,000 or more entries. + if (cps.length < 10e3) { return String.fromCodePoint.apply(String, cps); } return cps.map(singleton).join(""); From af2db11877de7ecac511ed35c9677560540bfa4b Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 23 Jun 2017 08:42:37 -0700 Subject: [PATCH 32/48] prefer string iteration over Array.from in _codePointAt FFI function --- src/Data/String/CodePoints.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 87fb2c5..041e275 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -24,17 +24,17 @@ exports._codePointAt = function (fallback) { return function (str) { var length = str.length; if (index < 0 || index >= length) return Nothing; - if (hasArrayFrom) { - var cps = Array.from(str); - if (index >= cps.length) return Nothing; - return Just(unsafeCodePointAt0(cps[index])); - } else if (hasStringIterator) { + if (hasStringIterator) { var iter = str[Symbol.iterator](); for (var i = index;; --i) { var o = iter.next(); if (o.done) return Nothing; if (i === 0) return Just(unsafeCodePointAt0(o.value)); } + } else if (hasArrayFrom) { + var cps = Array.from(str); + if (index >= cps.length) return Nothing; + return Just(unsafeCodePointAt0(cps[index])); } return fallback(index)(str); }; From 205838c84190ec300c6d8d1bca6ec1a3fd9990c6 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Tue, 4 Jul 2017 11:48:26 -0700 Subject: [PATCH 33/48] remove Newtype instance for CodePoint --- src/Data/String/CodePoints.purs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index dbc4294..10b3542 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -31,7 +31,6 @@ import Data.Array as Array import Data.Char as Char import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) -import Data.Newtype (class Newtype) import Data.String as String import Data.String.Unsafe as Unsafe -- WARN: This list must be updated to re-export any exports added to Data.String. That makes me sad. @@ -46,7 +45,6 @@ newtype CodePoint = CodePoint Int derive instance eqCodePoint :: Eq CodePoint derive instance ordCodePoint :: Ord CodePoint -derive instance newtypeCodePoint :: Newtype CodePoint _ -- I would prefer that this smart constructor not need to exist and instead -- CodePoint just implements Enum, but the Enum module already depends on this From 3b57fd4fa775fbb9baf65471e5b03e3ab572ecfc Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Tue, 4 Jul 2017 12:48:06 -0700 Subject: [PATCH 34/48] remove duplication --- src/Data/String/CodePoints.purs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 10b3542..fb926dd 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -62,7 +62,7 @@ codePointFromSurrogatePair lead trail | isLead lead && isTrail trail = codePointFromSurrogatePair _ _ = Nothing unsurrogate :: Int -> Int -> CodePoint -unsurrogate h l = CodePoint ((h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000) +unsurrogate lead trail = CodePoint ((lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000) isLead :: Int -> Boolean isLead cu = 0xD800 <= cu && cu <= 0xDBFF @@ -83,7 +83,7 @@ foreign import _unsafeCodePointAt0 unsafeCodePointAt0Fallback :: String -> CodePoint unsafeCodePointAt0Fallback s | String.length s == 1 = CodePoint (Unsafe.charCodeAt 0 s) -unsafeCodePointAt0Fallback s = CodePoint (((lead - 0xD800) * 0x400) + (trail - 0xDC00) + 0x10000) +unsafeCodePointAt0Fallback s = CodePoint (unsurrogate lead trail) where lead = Unsafe.charCodeAt 0 s trail = Unsafe.charCodeAt 1 s From 7eac69e2efcd59d17a8bfcdf570954fc3bdd5f3b Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Tue, 4 Jul 2017 12:50:57 -0700 Subject: [PATCH 35/48] remove unused function --- src/Data/String/CodePoints.purs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index fb926dd..acca533 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -56,11 +56,6 @@ codePointFromInt n = Nothing codePointToInt :: CodePoint -> Int codePointToInt (CodePoint n) = n -codePointFromSurrogatePair :: Int -> Int -> Maybe CodePoint -codePointFromSurrogatePair lead trail | isLead lead && isTrail trail = - Just (unsurrogate lead trail) -codePointFromSurrogatePair _ _ = Nothing - unsurrogate :: Int -> Int -> CodePoint unsurrogate lead trail = CodePoint ((lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000) From cde0d26213ceec0b9083eb04709513f278bf5579 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Wed, 5 Jul 2017 09:49:31 -0700 Subject: [PATCH 36/48] bug fix for unsafeCodePointAt0Fallback --- src/Data/String/CodePoints.purs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index acca533..ed0bff5 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -77,11 +77,13 @@ foreign import _unsafeCodePointAt0 -> CodePoint unsafeCodePointAt0Fallback :: String -> CodePoint -unsafeCodePointAt0Fallback s | String.length s == 1 = CodePoint (Unsafe.charCodeAt 0 s) -unsafeCodePointAt0Fallback s = CodePoint (unsurrogate lead trail) +unsafeCodePointAt0Fallback s = + if isLead cu0 && isTrail cu1 + then unsurrogate cu0 cu1 + else CodePoint cu0 where - lead = Unsafe.charCodeAt 0 s - trail = Unsafe.charCodeAt 1 s + cu0 = Unsafe.charCodeAt 0 s + cu1 = Unsafe.charCodeAt 1 s codePointAt :: Int -> String -> Maybe CodePoint From 4292a8bad00ff9b0ea896268cf4b5a7f489f2d01 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Wed, 5 Jul 2017 09:51:48 -0700 Subject: [PATCH 37/48] consistent code unit variable names --- src/Data/String/CodePoints.purs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index ed0bff5..8ceb5d7 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -210,9 +210,9 @@ toCodePointArrayFallback :: String -> Array CodePoint toCodePointArrayFallback s = unfoldr decode (fromFoldable (Char.toCharCode <$> String.toCharArray s)) where decode :: List Int -> Maybe (Tuple CodePoint (List Int)) - decode (Cons h (Cons l rest)) | isLead h && isTrail l - = Just (Tuple (unsurrogate h l) rest) - decode (Cons c rest) = Just (Tuple (CodePoint c) rest) + decode (Cons cu0 (Cons cu1 rest)) | isLead cu0 && isTrail cu1 + = Just (Tuple (unsurrogate cu0 cu1) rest) + decode (Cons cu rest) = Just (Tuple (CodePoint cu) rest) decode Nil = Nothing From 0d81e0b4207a37f0173d2864d925f70052250d28 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 6 Jul 2017 09:45:28 -0700 Subject: [PATCH 38/48] bug fix lastIndexOf' --- src/Data/String/CodePoints.purs | 4 ++-- test/Test/Data/String/CodePoints.purs | 31 +++++++++++++++++++-------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 8ceb5d7..e7bb424 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -149,8 +149,8 @@ lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int lastIndexOf' p i s = - let s' = drop i s in - (\k -> i + length (String.take k s')) <$> String.lastIndexOf p s' + let i' = String.length (take i s) in + (\k -> length (String.take k s)) <$> String.lastIndexOf' p i' s length :: String -> Int diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index 457ba80..7a6aef0 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -101,17 +101,30 @@ testStringCodePoints = do log "lastIndexOf'" assert $ lastIndexOf' (Pattern "") 0 "" == Just 0 assert $ lastIndexOf' (Pattern str) 0 str == Just 0 - assert $ lastIndexOf' (Pattern str) 1 str == Nothing + assert $ lastIndexOf' (Pattern str) 1 str == Just 0 assert $ lastIndexOf' (Pattern "a") 0 str == Just 0 - assert $ lastIndexOf' (Pattern "a") 1 str == Nothing - assert $ lastIndexOf' (Pattern "z") 0 str == Just 6 - assert $ lastIndexOf' (Pattern "z") 1 str == Just 6 - assert $ lastIndexOf' (Pattern "z") 2 str == Just 6 - assert $ lastIndexOf' (Pattern "z") 3 str == Just 6 - assert $ lastIndexOf' (Pattern "z") 4 str == Just 6 - assert $ lastIndexOf' (Pattern "z") 5 str == Just 6 + assert $ lastIndexOf' (Pattern "a") 7 str == Just 0 + assert $ lastIndexOf' (Pattern "z") 0 str == Nothing + assert $ lastIndexOf' (Pattern "z") 1 str == Nothing + assert $ lastIndexOf' (Pattern "z") 2 str == Nothing + assert $ lastIndexOf' (Pattern "z") 3 str == Nothing + assert $ lastIndexOf' (Pattern "z") 4 str == Nothing + assert $ lastIndexOf' (Pattern "z") 5 str == Nothing assert $ lastIndexOf' (Pattern "z") 6 str == Just 6 - assert $ lastIndexOf' (Pattern "z") 7 str == Nothing + assert $ lastIndexOf' (Pattern "z") 7 str == Just 6 + assert $ lastIndexOf' (Pattern "\xD800") 7 str == Just 3 + assert $ lastIndexOf' (Pattern "\xD800") 6 str == Just 3 + assert $ lastIndexOf' (Pattern "\xD800") 5 str == Just 3 + assert $ lastIndexOf' (Pattern "\xD800") 4 str == Just 3 + assert $ lastIndexOf' (Pattern "\xD800") 3 str == Just 3 + assert $ lastIndexOf' (Pattern "\xD800") 2 str == Just 2 + assert $ lastIndexOf' (Pattern "\xD800") 1 str == Nothing + assert $ lastIndexOf' (Pattern "\xD800") 0 str == Nothing + assert $ lastIndexOf' (Pattern "\x16A06") 7 str == Just 5 + assert $ lastIndexOf' (Pattern "\x16A06") 6 str == Just 5 + assert $ lastIndexOf' (Pattern "\x16A06") 5 str == Just 5 + assert $ lastIndexOf' (Pattern "\x16A06") 4 str == Nothing + assert $ lastIndexOf' (Pattern "\x16A06") 3 str == Nothing log "length" assert $ length "" == 0 From 370af7cae24407f339d1f9e1fdbcd0eabd7d27e2 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 6 Jul 2017 10:07:13 -0700 Subject: [PATCH 39/48] add comments and complexity notes --- src/Data/String/CodePoints.purs | 46 ++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index e7bb424..31b7747 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -68,6 +68,7 @@ isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF fromCharCode :: Int -> String fromCharCode = String.singleton <<< Char.fromCharCode +-- WARN: this function expects the String parameter to be non-empty unsafeCodePointAt0 :: String -> CodePoint unsafeCodePointAt0 = _unsafeCodePointAt0 unsafeCodePointAt0Fallback @@ -86,6 +87,9 @@ unsafeCodePointAt0Fallback s = cu1 = Unsafe.charCodeAt 1 s +-- | Returns the first code point of the string after dropping the given number +-- | of code points from the beginning, if there is such a code point. Operates +-- | in constant space and in time linear to `n`. codePointAt :: Int -> String -> Maybe CodePoint codePointAt 0 "" = Nothing codePointAt 0 s = Just (unsafeCodePointAt0 s) @@ -104,6 +108,9 @@ codePointAtFallback :: Int -> String -> Maybe CodePoint codePointAtFallback n s = Array.index (toCodePointArray s) n +-- | Returns the number of code points in the leading sequence of code points +-- | which all match the given predicate. Operates in constant space and in +-- | time linear to the length of the given string. count :: (CodePoint -> Boolean) -> String -> Int count = _count isLead isTrail unsurrogate @@ -116,14 +123,23 @@ foreign import _count -> Int +-- | Drops the given number of code points from the beginning of the given +-- | string. If the string does not have that many code points, returns the +-- | empty string. Operates in space and time linear to the length of the given +-- | string. drop :: Int -> String -> String drop n s = fromCodePointArray (Array.drop n (toCodePointArray s)) +-- | Drops the leading sequence of code points which all match the given +-- | predicate from the given string. Operates in space and time linear to the +-- | length of the given string. dropWhile :: (CodePoint -> Boolean) -> String -> String dropWhile p s = drop (count p s) s +-- | Creates a string from an array of code points. Operates in space and time +-- | linear to the length of the given array. fromCodePointArray :: Array CodePoint -> String fromCodePointArray = _fromCodePointArray singletonFallback @@ -132,31 +148,44 @@ foreign import _fromCodePointArray -> Array CodePoint -> String - +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the given string. Returns Nothing when no matches are found. indexOf :: String.Pattern -> String -> Maybe Int indexOf p s = (\i -> length (String.take i s)) <$> String.indexOf p s +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the given string. Pattern matches preceding the given index +-- | will be ignored. Returns Nothing when no matches are found. indexOf' :: String.Pattern -> Int -> String -> Maybe Int indexOf' p i s = let s' = drop i s in (\k -> i + length (String.take k s')) <$> String.indexOf p s' +-- | Returns the number of code points preceding the last match of the given +-- | pattern in the given string. Returns Nothing when no matches are found. lastIndexOf :: String.Pattern -> String -> Maybe Int lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the given string. Pattern matches following the given index +-- | will be ignored. Returns Nothing when no matches are found. lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int lastIndexOf' p i s = let i' = String.length (take i s) in (\k -> length (String.take k s)) <$> String.lastIndexOf' p i' s +-- | Returns the number of code points in the given string. Operates in +-- | constant space and time linear to the length of the string. length :: String -> Int length = Array.length <<< toCodePointArray +-- | Creates a string containing just the given code point. Operates in +-- | constant space and time. singleton :: CodePoint -> String singleton = _singleton singletonFallback @@ -173,6 +202,9 @@ singletonFallback (CodePoint cp) = fromCharCode lead <> fromCharCode trail trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 +-- | Returns a record with strings created from the code points on either side +-- | of the given index. If the index is not within the string, Nothing is +-- | returned. splitAt :: Int -> String -> Maybe { before :: String, after :: String } splitAt i s = let cps = toCodePointArray s in @@ -184,6 +216,10 @@ splitAt i s = } +-- | Returns a string containing the given number of code points from the +-- | beginning of the given string. If the string does not have that many code +-- | points, returns the empty string. Operates in space and time linear to the +-- | given number. take :: Int -> String -> String take = _take takeFallback @@ -193,10 +229,15 @@ takeFallback :: Int -> String -> String takeFallback n s = fromCodePointArray (Array.take n (toCodePointArray s)) +-- | Returns a string containing the leading sequence of code points which all +-- | match the given predicate from the given string. Operates in space and +-- | time linear to the given number. takeWhile :: (CodePoint -> Boolean) -> String -> String takeWhile p s = take (count p s) s +-- | Creates an array of code points from a string. Operates in space and time +-- | linear to the length of the given string. toCodePointArray :: String -> Array CodePoint toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0 @@ -216,5 +257,8 @@ toCodePointArrayFallback s = unfoldr decode (fromFoldable (Char.toCharCode <$> S decode Nil = Nothing +-- | Returns a record with the first code point and the remaining code points +-- | of the given string. Returns Nothing if the string is empty. Operates in +-- | space and time linear to the length of the string. uncons :: String -> Maybe { head :: CodePoint, tail :: String } uncons s = { head: _, tail: drop 1 s } <$> codePointAt 0 s From cef521afe2f8379f20d60ad62608fe5e8c9b6c14 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Thu, 6 Jul 2017 10:10:44 -0700 Subject: [PATCH 40/48] update Data.String import warning comment --- src/Data/String/CodePoints.purs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 31b7747..93b27bc 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -33,7 +33,8 @@ import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) import Data.String as String import Data.String.Unsafe as Unsafe --- WARN: This list must be updated to re-export any exports added to Data.String. That makes me sad. +-- WARN: In order for this module to be a drop-in replacement for Data.String, +-- this list must be updated to re-export any exports added to Data.String. import Data.String (Pattern(..), Replacement(..), charAt, charCodeAt, contains, fromCharArray, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toChar, toCharArray, toLower, toUpper, trim) as StringReExports import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) From b38eb80e1ea9f212f889af56f6b99be00614e583 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 7 Jul 2017 02:22:08 -0700 Subject: [PATCH 41/48] refactor to avoid lists dep; better complexity adherence in fallbacks --- bower.json | 1 - src/Data/String/CodePoints.purs | 31 ++++++++++++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/bower.json b/bower.json index a1fa92d..5178621 100644 --- a/bower.json +++ b/bower.json @@ -22,7 +22,6 @@ "purescript-maybe": "^3.0.0", "purescript-partial": "^1.2.0", "purescript-unfoldable": "^3.0.0", - "purescript-lists": "^4.1.1", "purescript-arrays": "^4.0.1" }, "devDependencies": { diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 93b27bc..eb02577 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -29,7 +29,6 @@ import Prelude import Data.Array as Array import Data.Char as Char -import Data.List (List(Cons, Nil), fromFoldable) import Data.Maybe (Maybe(Just, Nothing)) import Data.String as String import Data.String.Unsafe as Unsafe @@ -106,7 +105,9 @@ foreign import _codePointAt -> Maybe CodePoint codePointAtFallback :: Int -> String -> Maybe CodePoint -codePointAtFallback n s = Array.index (toCodePointArray s) n +codePointAtFallback n s = case uncons s of + Just { head, tail } -> if n == 0 then Just head else codePointAtFallback (n - 1) tail + _ -> Nothing -- | Returns the number of code points in the leading sequence of code points @@ -129,7 +130,7 @@ foreign import _count -- | empty string. Operates in space and time linear to the length of the given -- | string. drop :: Int -> String -> String -drop n s = fromCodePointArray (Array.drop n (toCodePointArray s)) +drop n s = String.drop (String.length (take n s)) s -- | Drops the leading sequence of code points which all match the given @@ -227,7 +228,10 @@ take = _take takeFallback foreign import _take :: (Int -> String -> String) -> Int -> String -> String takeFallback :: Int -> String -> String -takeFallback n s = fromCodePointArray (Array.take n (toCodePointArray s)) +takeFallback n _ | n < 1 = "" +takeFallback n s = case uncons s of + Just { head, tail } -> singleton head <> takeFallback (n - 1) tail + _ -> s -- | Returns a string containing the leading sequence of code points which all @@ -249,17 +253,22 @@ foreign import _toCodePointArray -> Array CodePoint toCodePointArrayFallback :: String -> Array CodePoint -toCodePointArrayFallback s = unfoldr decode (fromFoldable (Char.toCharCode <$> String.toCharArray s)) +toCodePointArrayFallback s = unfoldr decode s where - decode :: List Int -> Maybe (Tuple CodePoint (List Int)) - decode (Cons cu0 (Cons cu1 rest)) | isLead cu0 && isTrail cu1 - = Just (Tuple (unsurrogate cu0 cu1) rest) - decode (Cons cu rest) = Just (Tuple (CodePoint cu) rest) - decode Nil = Nothing + decode :: String -> Maybe (Tuple CodePoint String) + decode s' = (\{ head, tail } -> Tuple head tail) <$> uncons s' -- | Returns a record with the first code point and the remaining code points -- | of the given string. Returns Nothing if the string is empty. Operates in -- | space and time linear to the length of the string. uncons :: String -> Maybe { head :: CodePoint, tail :: String } -uncons s = { head: _, tail: drop 1 s } <$> codePointAt 0 s +uncons s = case String.length s of + 0 -> Nothing + 1 -> Just { head: CodePoint (Unsafe.charCodeAt 0 s), tail: "" } + _ -> + let cu0 = Unsafe.charCodeAt 0 s in + let cu1 = Unsafe.charCodeAt 1 s in + if isLead cu0 && isTrail cu1 + then Just { head: unsurrogate cu0 cu1, tail: String.drop 2 s } + else Just { head: CodePoint cu0, tail: String.drop 1 s } From 4f3d71d72b5415d6ab043efd0e918e0ce17291ab Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 7 Jul 2017 02:24:30 -0700 Subject: [PATCH 42/48] remove fallback to Array.from in codePointAt JS implementation for now --- src/Data/String/CodePoints.js | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 041e275..e968c32 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -31,10 +31,6 @@ exports._codePointAt = function (fallback) { if (o.done) return Nothing; if (i === 0) return Just(unsafeCodePointAt0(o.value)); } - } else if (hasArrayFrom) { - var cps = Array.from(str); - if (index >= cps.length) return Nothing; - return Just(unsafeCodePointAt0(cps[index])); } return fallback(index)(str); }; From e3cea19e06a7e51fbb6313d217a7d0a3f5c9d7b3 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 7 Jul 2017 02:28:10 -0700 Subject: [PATCH 43/48] prefer let over where --- src/Data/String/CodePoints.purs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index eb02577..22e6930 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -79,12 +79,11 @@ foreign import _unsafeCodePointAt0 unsafeCodePointAt0Fallback :: String -> CodePoint unsafeCodePointAt0Fallback s = + let cu0 = Unsafe.charCodeAt 0 s in + let cu1 = Unsafe.charCodeAt 1 s in if isLead cu0 && isTrail cu1 then unsurrogate cu0 cu1 else CodePoint cu0 - where - cu0 = Unsafe.charCodeAt 0 s - cu1 = Unsafe.charCodeAt 1 s -- | Returns the first code point of the string after dropping the given number @@ -198,10 +197,10 @@ foreign import _singleton singletonFallback :: CodePoint -> String singletonFallback (CodePoint cp) | cp <= 0xFFFF = fromCharCode cp -singletonFallback (CodePoint cp) = fromCharCode lead <> fromCharCode trail - where - lead = ((cp - 0x10000) / 0x400) + 0xD800 - trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 +singletonFallback (CodePoint cp) = + let lead = ((cp - 0x10000) / 0x400) + 0xD800 in + let trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 in + fromCharCode lead <> fromCharCode trail -- | Returns a record with strings created from the code points on either side From db3eba31fd1e15994a28a359b31249b30956200f Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 7 Jul 2017 09:20:46 -0700 Subject: [PATCH 44/48] change JS implementation of count to use string iterator if possible --- src/Data/String/CodePoints.js | 27 ++++++++++----------------- src/Data/String/CodePoints.purs | 16 ++++++++++++---- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index e968c32..1c73483 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -40,29 +40,22 @@ exports._codePointAt = function (fallback) { }; }; -exports._count = function (isLead) { - return function (isTrail) { - return function (unsurrogate) { +exports._count = function (fallback) { + return function (unsafeCodePointAt0) { + if (hasStringIterator) { return function (pred) { return function (str) { - var cpCount = 0; - for (var cuCount = 0; cuCount < str.length; ++cuCount) { - var lead = str.charCodeAt(cuCount); - var cp = lead; - if (isLead(lead) && cuCount + 1 < str.length) { - var trail = str.charCodeAt(cuCount + 1); - if (isTrail(trail)) { - cp = unsurrogate(lead)(trail); - ++cuCount; - } - } + var iter = str[Symbol.iterator](); + for (var cpCount = 0; ; ++cpCount) { + var o = iter.next(); + if (o.done) return cpCount; + var cp = unsafeCodePointAt0(o.value); if (!pred(cp)) return cpCount; - ++cpCount; } - return cpCount; }; }; - }; + } + return fallback; }; }; diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 22e6930..1638d93 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -90,6 +90,7 @@ unsafeCodePointAt0Fallback s = -- | of code points from the beginning, if there is such a code point. Operates -- | in constant space and in time linear to `n`. codePointAt :: Int -> String -> Maybe CodePoint +codePointAt n _ | n < 0 = Nothing codePointAt 0 "" = Nothing codePointAt 0 s = Just (unsafeCodePointAt0 s) codePointAt n s = _codePointAt codePointAtFallback Just Nothing unsafeCodePointAt0 n s @@ -113,16 +114,23 @@ codePointAtFallback n s = case uncons s of -- | which all match the given predicate. Operates in constant space and in -- | time linear to the length of the given string. count :: (CodePoint -> Boolean) -> String -> Int -count = _count isLead isTrail unsurrogate +count = _count countFallback unsafeCodePointAt0 foreign import _count - :: (Int -> Boolean) - -> (Int -> Boolean) - -> (Int -> Int -> CodePoint) + :: ((CodePoint -> Boolean) -> String -> Int) + -> (String -> CodePoint) -> (CodePoint -> Boolean) -> String -> Int +countFallback :: (CodePoint -> Boolean) -> String -> Int +countFallback p s = countTail p s 0 + where + countTail :: (CodePoint -> Boolean) -> String -> Int -> Int + countTail p' s' accum = case uncons s' of + Just { head, tail } -> if p' head then countTail p' tail (accum + 1) else accum + _ -> accum + -- | Drops the given number of code points from the beginning of the given -- | string. If the string does not have that many code points, returns the From 3a24c8d00b40a61fed564c9a6d0a154b180d8196 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 7 Jul 2017 09:39:05 -0700 Subject: [PATCH 45/48] update comments --- src/Data/String/CodePoints.purs | 51 +++++++++++++++++---------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 1638d93..e71560a 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -32,8 +32,12 @@ import Data.Char as Char import Data.Maybe (Maybe(Just, Nothing)) import Data.String as String import Data.String.Unsafe as Unsafe --- WARN: In order for this module to be a drop-in replacement for Data.String, --- this list must be updated to re-export any exports added to Data.String. +-- WARN: If a new function is added to Data.String, a version of that function +-- should be exported from this module, which should be the same except that it +-- should operate on the code point level rather than the code unit level. If +-- the function's behaviour does not change based on whether we consider +-- strings as sequences of code points or code units, it can simply be +-- re-exported from Data.String. import Data.String (Pattern(..), Replacement(..), charAt, charCodeAt, contains, fromCharArray, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toChar, toCharArray, toLower, toUpper, trim) as StringReExports import Data.Tuple (Tuple(Tuple)) import Data.Unfoldable (unfoldr) @@ -88,7 +92,7 @@ unsafeCodePointAt0Fallback s = -- | Returns the first code point of the string after dropping the given number -- | of code points from the beginning, if there is such a code point. Operates --- | in constant space and in time linear to `n`. +-- | in constant space and in time linear to the given index. codePointAt :: Int -> String -> Maybe CodePoint codePointAt n _ | n < 0 = Nothing codePointAt 0 "" = Nothing @@ -112,7 +116,7 @@ codePointAtFallback n s = case uncons s of -- | Returns the number of code points in the leading sequence of code points -- | which all match the given predicate. Operates in constant space and in --- | time linear to the length of the given string. +-- | time linear to the length of the string. count :: (CodePoint -> Boolean) -> String -> Int count = _count countFallback unsafeCodePointAt0 @@ -132,23 +136,22 @@ countFallback p s = countTail p s 0 _ -> accum --- | Drops the given number of code points from the beginning of the given --- | string. If the string does not have that many code points, returns the --- | empty string. Operates in space and time linear to the length of the given --- | string. +-- | Drops the given number of code points from the beginning of the string. If +-- | the string does not have that many code points, returns the empty string. +-- | Operates in space and time linear to the length of the string. drop :: Int -> String -> String drop n s = String.drop (String.length (take n s)) s -- | Drops the leading sequence of code points which all match the given --- | predicate from the given string. Operates in space and time linear to the --- | length of the given string. +-- | predicate from the string. Operates in space and time linear to the +-- | length of the string. dropWhile :: (CodePoint -> Boolean) -> String -> String dropWhile p s = drop (count p s) s -- | Creates a string from an array of code points. Operates in space and time --- | linear to the length of the given array. +-- | linear to the length of the array. fromCodePointArray :: Array CodePoint -> String fromCodePointArray = _fromCodePointArray singletonFallback @@ -158,14 +161,14 @@ foreign import _fromCodePointArray -> String -- | Returns the number of code points preceding the first match of the given --- | pattern in the given string. Returns Nothing when no matches are found. +-- | pattern in the string. Returns Nothing when no matches are found. indexOf :: String.Pattern -> String -> Maybe Int indexOf p s = (\i -> length (String.take i s)) <$> String.indexOf p s -- | Returns the number of code points preceding the first match of the given --- | pattern in the given string. Pattern matches preceding the given index --- | will be ignored. Returns Nothing when no matches are found. +-- | pattern in the string. Pattern matches preceding the given index will be +-- | ignored. Returns Nothing when no matches are found. indexOf' :: String.Pattern -> Int -> String -> Maybe Int indexOf' p i s = let s' = drop i s in @@ -173,22 +176,22 @@ indexOf' p i s = -- | Returns the number of code points preceding the last match of the given --- | pattern in the given string. Returns Nothing when no matches are found. +-- | pattern in the string. Returns Nothing when no matches are found. lastIndexOf :: String.Pattern -> String -> Maybe Int lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s -- | Returns the number of code points preceding the first match of the given --- | pattern in the given string. Pattern matches following the given index --- | will be ignored. Returns Nothing when no matches are found. +-- | pattern in the string. Pattern matches following the given index will be +-- | ignored. Returns Nothing when no matches are found. lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int lastIndexOf' p i s = let i' = String.length (take i s) in (\k -> length (String.take k s)) <$> String.lastIndexOf' p i' s --- | Returns the number of code points in the given string. Operates in --- | constant space and time linear to the length of the string. +-- | Returns the number of code points in the string. Operates in constant +-- | space and in time linear to the length of the string. length :: String -> Int length = Array.length <<< toCodePointArray @@ -242,14 +245,14 @@ takeFallback n s = case uncons s of -- | Returns a string containing the leading sequence of code points which all --- | match the given predicate from the given string. Operates in space and --- | time linear to the given number. +-- | match the given predicate from the string. Operates in space and time +-- | linear to the length of the string. takeWhile :: (CodePoint -> Boolean) -> String -> String takeWhile p s = take (count p s) s -- | Creates an array of code points from a string. Operates in space and time --- | linear to the length of the given string. +-- | linear to the length of the string. toCodePointArray :: String -> Array CodePoint toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0 @@ -267,8 +270,8 @@ toCodePointArrayFallback s = unfoldr decode s -- | Returns a record with the first code point and the remaining code points --- | of the given string. Returns Nothing if the string is empty. Operates in --- | space and time linear to the length of the string. +-- | of the string. Returns Nothing if the string is empty. Operates in +-- | constant space and time. uncons :: String -> Maybe { head :: CodePoint, tail :: String } uncons s = case String.length s of 0 -> Nothing From 82a502f39ba8a8d52c29291da6350dffc3736f0b Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 7 Jul 2017 22:27:36 -0700 Subject: [PATCH 46/48] pull functions out of where clauses --- src/Data/String/CodePoints.purs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index e71560a..c0290a1 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -129,11 +129,11 @@ foreign import _count countFallback :: (CodePoint -> Boolean) -> String -> Int countFallback p s = countTail p s 0 - where - countTail :: (CodePoint -> Boolean) -> String -> Int -> Int - countTail p' s' accum = case uncons s' of - Just { head, tail } -> if p' head then countTail p' tail (accum + 1) else accum - _ -> accum + +countTail :: (CodePoint -> Boolean) -> String -> Int -> Int +countTail p s accum = case uncons s of + Just { head, tail } -> if p head then countTail p tail (accum + 1) else accum + _ -> accum -- | Drops the given number of code points from the beginning of the string. If @@ -263,10 +263,10 @@ foreign import _toCodePointArray -> Array CodePoint toCodePointArrayFallback :: String -> Array CodePoint -toCodePointArrayFallback s = unfoldr decode s - where - decode :: String -> Maybe (Tuple CodePoint String) - decode s' = (\{ head, tail } -> Tuple head tail) <$> uncons s' +toCodePointArrayFallback s = unfoldr unconsButWithTuple s + +unconsButWithTuple :: String -> Maybe (Tuple CodePoint String) +unconsButWithTuple s' = (\{ head, tail } -> Tuple head tail) <$> uncons s' -- | Returns a record with the first code point and the remaining code points From 085022e290d1c42f94714723fa39b410eece3157 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Fri, 7 Jul 2017 22:31:16 -0700 Subject: [PATCH 47/48] change complexity documentation for drop{,While} and take{,While} --- src/Data/String/CodePoints.purs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index c0290a1..b4bca52 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -138,14 +138,14 @@ countTail p s accum = case uncons s of -- | Drops the given number of code points from the beginning of the string. If -- | the string does not have that many code points, returns the empty string. --- | Operates in space and time linear to the length of the string. +-- | Operates in constant space and in time linear to the given number. drop :: Int -> String -> String drop n s = String.drop (String.length (take n s)) s -- | Drops the leading sequence of code points which all match the given --- | predicate from the string. Operates in space and time linear to the --- | length of the string. +-- | predicate from the string. Operates in constant space and in time linear +-- | to the length of the string. dropWhile :: (CodePoint -> Boolean) -> String -> String dropWhile p s = drop (count p s) s @@ -230,8 +230,8 @@ splitAt i s = -- | Returns a string containing the given number of code points from the -- | beginning of the given string. If the string does not have that many code --- | points, returns the empty string. Operates in space and time linear to the --- | given number. +-- | points, returns the empty string. Operates in constant space and in time +-- | linear to the given number. take :: Int -> String -> String take = _take takeFallback @@ -245,8 +245,8 @@ takeFallback n s = case uncons s of -- | Returns a string containing the leading sequence of code points which all --- | match the given predicate from the string. Operates in space and time --- | linear to the length of the string. +-- | match the given predicate from the string. Operates in constant space and +-- | in time linear to the length of the string. takeWhile :: (CodePoint -> Boolean) -> String -> String takeWhile p s = take (count p s) s From 6edb70fd2f7fea69502025cb90882008004e79b1 Mon Sep 17 00:00:00 2001 From: Michael Ficarra Date: Sat, 8 Jul 2017 07:15:55 -0700 Subject: [PATCH 48/48] forgot about a prime --- src/Data/String/CodePoints.purs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index b4bca52..7fe51f3 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -266,7 +266,7 @@ toCodePointArrayFallback :: String -> Array CodePoint toCodePointArrayFallback s = unfoldr unconsButWithTuple s unconsButWithTuple :: String -> Maybe (Tuple CodePoint String) -unconsButWithTuple s' = (\{ head, tail } -> Tuple head tail) <$> uncons s' +unconsButWithTuple s = (\{ head, tail } -> Tuple head tail) <$> uncons s -- | Returns a record with the first code point and the remaining code points