diff --git a/System/OsString.hs b/System/OsString.hs index c11a4bdf..14b294c9 100644 --- a/System/OsString.hs +++ b/System/OsString.hs @@ -24,6 +24,8 @@ module System.OsString , encodeWith , encodeFS , osstr + , empty + , singleton , pack -- * OsString deconstruction @@ -40,6 +42,87 @@ module System.OsString -- * Word deconstruction , toChar + + -- * Basic interface + , snoc + , cons + , last + , tail + , uncons + , head + , init + , unsnoc + , null + , length + + -- * Transforming OsString + , map + , reverse + , intercalate + + -- * Reducing OsStrings (folds) + , foldl + , foldl' + , foldl1 + , foldl1' + , foldr + , foldr' + , foldr1 + , foldr1' + + -- * Special folds + , all + , any + , concat + + -- * Generating and unfolding OsStrings + , replicate + , unfoldr + , unfoldrN + + -- * Substrings + -- ** Breaking strings + , take + , takeEnd + , takeWhileEnd + , takeWhile + , drop + , dropEnd + , dropWhileEnd + , dropWhile + , break + , breakEnd + , span + , spanEnd + , splitAt + , split + , splitWith + , stripSuffix + , stripPrefix + + -- * Predicates + , isInfixOf + , isPrefixOf + , isSuffixOf + -- ** Search for arbitrary susbstrings + , breakSubstring + + -- * Searching OsStrings + -- ** Searching by equality + , elem + , find + , filter + , partition + + -- * Indexing OsStrings + , index + , indexMaybe + , (!?) + , elemIndex + , elemIndices + , count + , findIndex + , findIndices ) where @@ -51,10 +134,73 @@ import System.OsString.Internal , encodeFS , osstr , pack + , empty + , singleton , decodeUtf , decodeWith , decodeFS , unpack + , snoc + , cons + , last + , tail + , uncons + , head + , init + , unsnoc + , null + , length + , map + , reverse + , intercalate + , foldl + , foldl' + , foldl1 + , foldl1' + , foldr + , foldr' + , foldr1 + , foldr1' + , all + , any + , concat + , replicate + , unfoldr + , unfoldrN + , take + , takeEnd + , takeWhileEnd + , takeWhile + , drop + , dropEnd + , dropWhileEnd + , dropWhile + , break + , breakEnd + , span + , spanEnd + , splitAt + , split + , splitWith + , stripSuffix + , stripPrefix + , isInfixOf + , isPrefixOf + , isSuffixOf + , breakSubstring + , elem + , find + , filter + , partition + , index + , indexMaybe + , (!?) + , elemIndex + , elemIndices + , count + , findIndex + , findIndices ) import System.OsString.Internal.Types ( OsString, OsChar ) +import Prelude () diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index 80eb69b5..ed9e45d5 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -1,6 +1,10 @@ {- HLINT ignore "Unused LANGUAGE pragma" -} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-unused-imports #-} + -- This template expects CPP definitions for: -- MODULE_NAME = Posix | Windows -- IS_WINDOWS = False | True @@ -28,6 +32,8 @@ module System.OsString.MODULE_NAME , encodeFS , fromBytes , pstr + , singleton + , empty , pack -- * String deconstruction @@ -41,6 +47,87 @@ module System.OsString.MODULE_NAME -- * Word deconstruction , toChar + + -- * Basic interface + , snoc + , cons + , last + , tail + , uncons + , head + , init + , unsnoc + , null + , length + + -- * Transforming OsString + , map + , reverse + , intercalate + + -- * Reducing OsStrings (folds) + , foldl + , foldl' + , foldl1 + , foldl1' + , foldr + , foldr' + , foldr1 + , foldr1' + + -- ** Special folds + , all + , any + , concat + + -- ** Generating and unfolding OsStrings + , replicate + , unfoldr + , unfoldrN + + -- * Substrings + -- ** Breaking strings + , take + , takeEnd + , takeWhileEnd + , takeWhile + , drop + , dropEnd + , dropWhileEnd + , dropWhile + , break + , breakEnd + , span + , spanEnd + , splitAt + , split + , splitWith + , stripSuffix + , stripPrefix + + -- * Predicates + , isInfixOf + , isPrefixOf + , isSuffixOf + -- ** Search for arbitrary susbstrings + , breakSubstring + + -- * Searching OsStrings + -- ** Searching by equality + , elem + , find + , filter + , partition + + -- * Indexing OsStrings + , index + , indexMaybe + , (!?) + , elemIndex + , elemIndices + , count + , findIndex + , findIndices ) where @@ -54,6 +141,7 @@ import System.OsString.Internal.Types ( #endif ) +import Data.Coerce import Data.Char import Control.Monad.Catch ( MonadThrow, throwM ) @@ -78,15 +166,19 @@ import System.OsPath.Encoding import System.IO ( TextEncoding, utf16le ) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 -import qualified System.OsPath.Data.ByteString.Short as BS8 +import qualified System.OsPath.Data.ByteString.Short.Word16 as BSP #else import System.OsPath.Encoding import System.IO ( TextEncoding, utf8 ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) -import qualified System.OsPath.Data.ByteString.Short as BS +import qualified System.OsPath.Data.ByteString.Short as BSP #endif +import GHC.Stack (HasCallStack) +import Prelude (Bool, Int, Maybe(..), IO, String, Either(..), fmap, ($), (.), mconcat, fromEnum, fromInteger, mempty, fromIntegral, fail, (<$>), show, either, pure, const, flip) +import Data.Bifunctor ( bimap ) +import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 +import qualified System.OsPath.Data.ByteString.Short as BS8 @@ -119,7 +211,7 @@ encodeWith enc str = unsafePerformIO $ do r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> WindowsString <$> BS8.packCStringLen cstr evaluate $ force $ first (flip EncodingError Nothing . displayException) r #else - r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> PosixString <$> BS.packCStringLen cstr + r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> PosixString <$> BSP.packCStringLen cstr evaluate $ force $ first (flip EncodingError Nothing . displayException) r #endif @@ -186,7 +278,7 @@ decodeWith :: TextEncoding -> PLATFORM_STRING -> Either EncodingException String decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do - r <- try @SomeException $ BS.useAsCStringLen ba $ \fp -> GHC.peekCStringLen unixEnc fp + r <- try @SomeException $ BSP.useAsCStringLen ba $ \fp -> GHC.peekCStringLen unixEnc fp evaluate $ force $ first (flip EncodingError Nothing . displayException) r #endif @@ -235,7 +327,7 @@ fromBytes bs = let ws = WindowsString . BS16.toShort $ bs in either throwM (const . pure $ ws) $ decodeWith ucs2le ws #else -fromBytes = pure . PosixString . BS.toShort +fromBytes = pure . PosixString . BSP.toShort #endif @@ -276,11 +368,7 @@ pstr = -- | Unpack a platform string to a list of platform words. unpack :: PLATFORM_STRING -> [PLATFORM_WORD] -#ifdef WINDOWS -unpack (WindowsString ba) = WindowsChar <$> BS16.unpack ba -#else -unpack (PosixString ba) = PosixChar <$> BS.unpack ba -#endif +unpack = coerce BSP.unpack -- | Pack a list of platform words to a platform string. @@ -289,11 +377,13 @@ unpack (PosixString ba) = PosixChar <$> BS.unpack ba -- convert from @[Char]@ to platform string is probably not what -- you want, because it will truncate unicode code points. pack :: [PLATFORM_WORD] -> PLATFORM_STRING -#ifdef WINDOWS -pack = WindowsString . BS16.pack . fmap (\(WindowsChar w) -> w) -#else -pack = PosixString . BS.pack . fmap (\(PosixChar w) -> w) -#endif +pack = coerce BSP.pack + +singleton :: PLATFORM_WORD -> PLATFORM_STRING +singleton = coerce BSP.singleton + +empty :: PLATFORM_STRING +empty = mempty #ifdef WINDOWS @@ -313,3 +403,545 @@ toChar (WindowsChar w) = chr $ fromIntegral w #else toChar (PosixChar w) = chr $ fromIntegral w #endif + +-- | /O(n)/ Append a byte to the end of a 'OsString' +-- +-- @since 1.4.200.0 +snoc :: PLATFORM_STRING -> PLATFORM_WORD -> PLATFORM_STRING +snoc = coerce BSP.snoc + +-- | /O(n)/ 'cons' is analogous to (:) for lists. +-- +-- @since 1.4.200.0 +cons :: PLATFORM_WORD -> PLATFORM_STRING -> PLATFORM_STRING +cons = coerce BSP.cons + + +-- | /O(1)/ Extract the last element of a OsString, which must be finite and non-empty. +-- An exception will be thrown in the case of an empty OsString. +-- +-- This is a partial function, consider using 'unsnoc' instead. +-- +-- @since 1.4.200.0 +last :: HasCallStack => PLATFORM_STRING -> PLATFORM_WORD +last = coerce BSP.last + +-- | /O(n)/ Extract the elements after the head of a OsString, which must be non-empty. +-- An exception will be thrown in the case of an empty OsString. +-- +-- This is a partial function, consider using 'uncons' instead. +-- +-- @since 1.4.200.0 +tail :: HasCallStack => PLATFORM_STRING -> PLATFORM_STRING +tail = coerce BSP.tail + +-- | /O(n)/ Extract the 'head' and 'tail' of a OsString, returning 'Nothing' +-- if it is empty. +-- +-- @since 1.4.200.0 +uncons :: PLATFORM_STRING -> Maybe (PLATFORM_WORD, PLATFORM_STRING) +uncons = coerce BSP.uncons + +-- | /O(1)/ Extract the first element of a OsString, which must be non-empty. +-- An exception will be thrown in the case of an empty OsString. +-- +-- This is a partial function, consider using 'uncons' instead. +-- +-- @since 1.4.200.0 +head :: HasCallStack => PLATFORM_STRING -> PLATFORM_WORD +head = coerce BSP.head + +-- | /O(n)/ Return all the elements of a 'OsString' except the last one. +-- An exception will be thrown in the case of an empty OsString. +-- +-- This is a partial function, consider using 'unsnoc' instead. +-- +-- @since 1.4.200.0 +init :: HasCallStack => PLATFORM_STRING -> PLATFORM_STRING +init = coerce BSP.init + +-- | /O(n)/ Extract the 'init' and 'last' of a OsString, returning 'Nothing' +-- if it is empty. +-- +-- @since 1.4.200.0 +unsnoc :: PLATFORM_STRING -> Maybe (PLATFORM_STRING, PLATFORM_WORD) +unsnoc = coerce BSP.unsnoc + +-- | /O(1)/. The empty 'OsString'. +-- +-- @since 1.4.200.0 +null :: PLATFORM_STRING -> Bool +null = coerce BSP.null + +-- | /O(1)/ The length of a 'OsString'. +-- +-- @since 1.4.200.0 +length :: PLATFORM_STRING -> Int +length = coerce BSP.length + +-- | /O(n)/ 'map' @f xs@ is the OsString obtained by applying @f@ to each +-- element of @xs@. +-- +-- @since 1.4.200.0 +map :: (PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_STRING +map = coerce BSP.map + +-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. +-- +-- @since 1.4.200.0 +reverse :: PLATFORM_STRING -> PLATFORM_STRING +reverse = coerce BSP.reverse + +-- | /O(n)/ The 'intercalate' function takes a 'OsString' and a list of +-- 'OsString's and concatenates the list after interspersing the first +-- argument between each element of the list. +-- +-- @since 1.4.200.0 +intercalate :: PLATFORM_STRING -> [PLATFORM_STRING] -> PLATFORM_STRING +intercalate = coerce BSP.intercalate + +-- | 'foldl', applied to a binary operator, a starting value (typically +-- the left-identity of the operator), and a OsString, reduces the +-- OsString using the binary operator, from left to right. +-- +-- @since 1.4.200.0 +foldl :: forall a. (a -> PLATFORM_WORD -> a) -> a -> PLATFORM_STRING -> a +foldl = coerce (BSP.foldl @a) + +-- | 'foldl'' is like 'foldl', but strict in the accumulator. +-- +-- @since 1.4.200.0 +foldl' + :: forall a. (a -> PLATFORM_WORD -> a) -> a -> PLATFORM_STRING -> a +foldl' = coerce (BSP.foldl' @a) + +-- | 'foldl1' is a variant of 'foldl' that has no starting value +-- argument, and thus must be applied to non-empty 'OsString's. +-- An exception will be thrown in the case of an empty OsString. +-- +-- @since 1.4.200.0 +foldl1 :: (PLATFORM_WORD -> PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_WORD +foldl1 = coerce BSP.foldl1 + +-- | 'foldl1'' is like 'foldl1', but strict in the accumulator. +-- An exception will be thrown in the case of an empty OsString. +-- +-- @since 1.4.200.0 +foldl1' + :: (PLATFORM_WORD -> PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_WORD +foldl1' = coerce BSP.foldl1' + +-- | 'foldr', applied to a binary operator, a starting value +-- (typically the right-identity of the operator), and a OsString, +-- reduces the OsString using the binary operator, from right to left. +-- +-- @since 1.4.200.0 +foldr :: forall a. (PLATFORM_WORD -> a -> a) -> a -> PLATFORM_STRING -> a +foldr = coerce (BSP.foldr @a) + +-- | 'foldr'' is like 'foldr', but strict in the accumulator. +-- +-- @since 1.4.200.0 +foldr' + :: forall a. (PLATFORM_WORD -> a -> a) -> a -> PLATFORM_STRING -> a +foldr' = coerce (BSP.foldr' @a) + +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, +-- and thus must be applied to non-empty 'OsString's +-- An exception will be thrown in the case of an empty OsString. +-- +-- @since 1.4.200.0 +foldr1 :: (PLATFORM_WORD -> PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_WORD +foldr1 = coerce BSP.foldr1 + +-- | 'foldr1'' is a variant of 'foldr1', but is strict in the +-- accumulator. +-- +-- @since 1.4.200.0 +foldr1' + :: (PLATFORM_WORD -> PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_WORD +foldr1' = coerce BSP.foldr1' + +-- | /O(n)/ Applied to a predicate and a 'OsString', 'all' determines +-- if all elements of the 'OsString' satisfy the predicate. +-- +-- @since 1.4.200.0 +all :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> Bool +all = coerce BSP.all + +-- | /O(n)/ Applied to a predicate and a 'OsString', 'any' determines if +-- any element of the 'OsString' satisfies the predicate. +-- +-- @since 1.4.200.0 +any :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> Bool +any = coerce BSP.any + +-- /O(n)/ Concatenate a list of OsStrings. +-- +-- @since 1.4.200.0 +concat :: [PLATFORM_STRING] -> PLATFORM_STRING +concat = mconcat + +-- | /O(n)/ 'replicate' @n x@ is a OsString of length @n@ with @x@ +-- the value of every element. The following holds: +-- +-- > replicate w c = unfoldr w (\u -> Just (u,u)) c +-- +-- @since 1.4.200.0 +replicate :: Int -> PLATFORM_WORD -> PLATFORM_STRING +replicate = coerce BSP.replicate + +-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' +-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a +-- OsString from a seed value. The function takes the element and +-- returns 'Nothing' if it is done producing the OsString or returns +-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, +-- and @b@ is the seed value for further production. +-- +-- This function is not efficient/safe. It will build a list of @[Word8]@ +-- and run the generator until it returns `Nothing`, otherwise recurse infinitely, +-- then finally create a 'OsString'. +-- +-- If you know the maximum length, consider using 'unfoldrN'. +-- +-- Examples: +-- +-- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 +-- > == pack [0, 1, 2, 3, 4, 5] +-- +-- @since 1.4.200.0 +unfoldr :: forall a. (a -> Maybe (PLATFORM_WORD, a)) -> a -> PLATFORM_STRING +unfoldr = coerce (BSP.unfoldr @a) + +-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a OsString from a seed +-- value. However, the length of the result is limited by the first +-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' +-- when the maximum length of the result is known. +-- +-- The following equation relates 'unfoldrN' and 'unfoldr': +-- +-- > fst (unfoldrN n f s) == take n (unfoldr f s) +-- +-- @since 1.4.200.0 +unfoldrN :: forall a. Int -> (a -> Maybe (PLATFORM_WORD, a)) -> a -> (PLATFORM_STRING, Maybe a) +unfoldrN = coerce (BSP.unfoldrN @a) + +-- | /O(n)/ 'take' @n@, applied to a OsString @xs@, returns the prefix +-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. +-- +-- @since 1.4.200.0 +take :: Int -> PLATFORM_STRING -> PLATFORM_STRING +take = coerce BSP.take + +-- | /O(n)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. +-- Takes @n@ elements from end of bytestring. +-- +-- >>> takeEnd 3 "abcdefg" +-- "efg" +-- >>> takeEnd 0 "abcdefg" +-- "" +-- >>> takeEnd 4 "abc" +-- "abc" +-- +-- @since 1.4.200.0 +takeEnd :: Int -> PLATFORM_STRING -> PLATFORM_STRING +takeEnd = coerce BSP.takeEnd + +-- | Returns the longest (possibly empty) suffix of elements +-- satisfying the predicate. +-- +-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. +-- +-- @since 1.4.200.0 +takeWhileEnd :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING +takeWhileEnd = coerce BSP.takeWhileEnd + +-- | Similar to 'Prelude.takeWhile', +-- returns the longest (possibly empty) prefix of elements +-- satisfying the predicate. +-- +-- @since 1.4.200.0 +takeWhile :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING +takeWhile = coerce BSP.takeWhile + +-- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or 'empty' if @n > 'length' xs@. +-- +-- @since 1.4.200.0 +drop :: Int -> PLATFORM_STRING -> PLATFORM_STRING +drop = coerce BSP.drop + +-- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. +-- Drops @n@ elements from end of bytestring. +-- +-- >>> dropEnd 3 "abcdefg" +-- "abcd" +-- >>> dropEnd 0 "abcdefg" +-- "abcdefg" +-- >>> dropEnd 4 "abc" +-- "" +-- +-- @since 1.4.200.0 +dropEnd :: Int -> PLATFORM_STRING -> PLATFORM_STRING +dropEnd = coerce BSP.dropEnd + +-- | Similar to 'Prelude.dropWhile', +-- drops the longest (possibly empty) prefix of elements +-- satisfying the predicate and returns the remainder. +-- +-- @since 1.4.200.0 +dropWhile :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING +dropWhile = coerce BSP.dropWhile + +-- | Similar to 'Prelude.dropWhileEnd', +-- drops the longest (possibly empty) suffix of elements +-- satisfying the predicate and returns the remainder. +-- +-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. +-- +-- @since 1.4.200.0 +dropWhileEnd :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING +dropWhileEnd = coerce BSP.dropWhileEnd + +-- | Returns the longest (possibly empty) suffix of elements which __do not__ +-- satisfy the predicate and the remainder of the string. +-- +-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. +-- +-- @since 1.4.200.0 +breakEnd :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) +breakEnd = coerce BSP.breakEnd + +-- | Similar to 'Prelude.break', +-- returns the longest (possibly empty) prefix of elements which __do not__ +-- satisfy the predicate and the remainder of the string. +-- +-- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. +-- +-- @since 1.4.200.0 +break :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) +break = coerce BSP.break + +-- | Similar to 'Prelude.span', +-- returns the longest (possibly empty) prefix of elements +-- satisfying the predicate and the remainder of the string. +-- +-- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. +-- +-- @since 1.4.200.0 +span :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) +span = coerce BSP.span + +-- | Returns the longest (possibly empty) suffix of elements +-- satisfying the predicate and the remainder of the string. +-- +-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. +-- +-- We have +-- +-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") +-- +-- and +-- +-- > spanEnd (not . isSpace) sbs +-- > == +-- > let (x, y) = span (not . isSpace) (reverse sbs) in (reverse y, reverse x) +-- +-- @since 1.4.200.0 +spanEnd :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) +spanEnd = coerce BSP.spanEnd + +-- | /O(n)/ 'splitAt' @n sbs@ is equivalent to @('take' n sbs, 'drop' n sbs)@. +-- +-- @since 1.4.200.0 +splitAt :: Int -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) +splitAt = coerce BSP.splitAt + +-- | /O(n)/ Break a 'OsString' into pieces separated by the byte +-- argument, consuming the delimiter. I.e. +-- +-- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 +-- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 +-- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 +-- > split undefined "" == [] -- and not [""] +-- +-- and +-- +-- > intercalate [c] . split c == id +-- > split == splitWith . (==) +-- +-- @since 1.4.200.0 +split :: PLATFORM_WORD -> PLATFORM_STRING -> [PLATFORM_STRING] +split = coerce BSP.split + +-- | /O(n)/ Splits a 'OsString' into components delimited by +-- separators, where the predicate returns True for a separator element. +-- The resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 +-- > splitWith undefined "" == [] -- and not [""] +-- +-- @since 1.4.200.0 +splitWith :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> [PLATFORM_STRING] +splitWith = coerce BSP.splitWith + +-- | /O(n)/ The 'stripSuffix' function takes two OsStrings and returns 'Just' +-- the remainder of the second iff the first is its suffix, and otherwise +-- 'Nothing'. +-- +-- @since 1.4.200.0 +stripSuffix :: PLATFORM_STRING -> PLATFORM_STRING -> Maybe PLATFORM_STRING +stripSuffix = coerce BSP.stripSuffix + +-- | /O(n)/ The 'stripPrefix' function takes two OsStrings and returns 'Just' +-- the remainder of the second iff the first is its prefix, and otherwise +-- 'Nothing'. +-- +-- @since 1.4.200.0 +stripPrefix :: PLATFORM_STRING -> PLATFORM_STRING -> Maybe PLATFORM_STRING +stripPrefix = coerce BSP.stripPrefix + + +-- | Check whether one string is a substring of another. +-- +-- @since 1.4.200.0 +isInfixOf :: PLATFORM_STRING -> PLATFORM_STRING -> Bool +isInfixOf = coerce BSP.isInfixOf + +-- |/O(n)/ The 'isPrefixOf' function takes two OsStrings and returns 'True' +-- +-- @since 1.4.200.0 +isPrefixOf :: PLATFORM_STRING -> PLATFORM_STRING -> Bool +isPrefixOf = coerce BSP.isPrefixOf + +-- | /O(n)/ The 'isSuffixOf' function takes two OsStrings and returns 'True' +-- iff the first is a suffix of the second. +-- +-- The following holds: +-- +-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y +-- +-- @since 1.4.200.0 +isSuffixOf :: PLATFORM_STRING -> PLATFORM_STRING -> Bool +isSuffixOf = coerce BSP.isSuffixOf + + +-- | Break a string on a substring, returning a pair of the part of the +-- string prior to the match, and the rest of the string. +-- +-- The following relationships hold: +-- +-- > break (== c) l == breakSubstring (singleton c) l +-- +-- For example, to tokenise a string, dropping delimiters: +-- +-- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) +-- > where (h,t) = breakSubstring x y +-- +-- To skip to the first occurrence of a string: +-- +-- > snd (breakSubstring x y) +-- +-- To take the parts of a string before a delimiter: +-- +-- > fst (breakSubstring x y) +-- +-- Note that calling `breakSubstring x` does some preprocessing work, so +-- you should avoid unnecessarily duplicating breakSubstring calls with the same +-- pattern. +-- +-- @since 1.4.200.0 +breakSubstring :: PLATFORM_STRING -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) +breakSubstring = coerce BSP.breakSubstring + +-- | /O(n)/ 'elem' is the 'OsString' membership predicate. +-- +-- @since 1.4.200.0 +elem :: PLATFORM_WORD -> PLATFORM_STRING -> Bool +elem = coerce BSP.elem + +-- | /O(n)/ The 'find' function takes a predicate and a OsString, +-- and returns the first element in matching the predicate, or 'Nothing' +-- if there is no such element. +-- +-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing +-- +-- @since 1.4.200.0 +find :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> Maybe PLATFORM_WORD +find = coerce BSP.find + +-- | /O(n)/ 'filter', applied to a predicate and a OsString, +-- returns a OsString containing those characters that satisfy the +-- predicate. +-- +-- @since 1.4.200.0 +filter :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING +filter = coerce BSP.filter + +-- | /O(n)/ The 'partition' function takes a predicate a OsString and returns +-- the pair of OsStrings with elements which do and do not satisfy the +-- predicate, respectively; i.e., +-- +-- > partition p bs == (filter p sbs, filter (not . p) sbs) +-- +-- @since 1.4.200.0 +partition :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) +partition = coerce BSP.partition + +-- | /O(1)/ 'OsString' index (subscript) operator, starting from 0. +-- +-- @since 1.4.200.0 +index :: HasCallStack => PLATFORM_STRING -> Int -> PLATFORM_WORD +index = coerce BSP.index + +-- | /O(1)/ 'OsString' index, starting from 0, that returns 'Just' if: +-- +-- > 0 <= n < length bs +-- +-- @since 1.4.200.0 +indexMaybe :: PLATFORM_STRING -> Int -> Maybe PLATFORM_WORD +indexMaybe = coerce BSP.indexMaybe + +-- | /O(1)/ 'OsString' index, starting from 0, that returns 'Just' if: +-- +-- > 0 <= n < length bs +-- +-- @since 1.4.200.0 +(!?) :: PLATFORM_STRING -> Int -> Maybe PLATFORM_WORD +(!?) = indexMaybe + +-- | /O(n)/ The 'elemIndex' function returns the index of the first +-- element in the given 'OsString' which is equal to the query +-- element, or 'Nothing' if there is no such element. +-- +-- @since 1.4.200.0 +elemIndex :: PLATFORM_WORD -> PLATFORM_STRING -> Maybe Int +elemIndex = coerce BSP.elemIndex + +-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning +-- the indices of all elements equal to the query element, in ascending order. +-- +-- @since 1.4.200.0 +elemIndices :: PLATFORM_WORD -> PLATFORM_STRING -> [Int] +elemIndices = coerce BSP.elemIndices + +-- | count returns the number of times its argument appears in the OsString +-- +-- @since 1.4.200.0 +count :: PLATFORM_WORD -> PLATFORM_STRING -> Int +count = coerce BSP.count + +-- | /O(n)/ The 'findIndex' function takes a predicate and a 'OsString' and +-- returns the index of the first element in the OsString +-- satisfying the predicate. +-- +-- @since 1.4.200.0 +findIndex :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> Maybe Int +findIndex = coerce BSP.findIndex + +-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the +-- indices of all elements satisfying the predicate, in ascending order. +-- +-- @since 1.4.200.0 +findIndices :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> [Int] +findIndices = coerce BSP.findIndices diff --git a/System/OsString/Internal.hs b/System/OsString/Internal.hs index f72fdcb7..c0b90ec3 100644 --- a/System/OsString/Internal.hs +++ b/System/OsString/Internal.hs @@ -1,6 +1,8 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} module System.OsString.Internal where @@ -27,6 +29,8 @@ import qualified System.OsString.Windows as PF import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import qualified System.OsString.Posix as PF #endif +import GHC.Stack (HasCallStack) +import Data.Coerce (coerce) @@ -148,7 +152,7 @@ osstr = -- | Unpack an 'OsString' to a list of 'OsChar'. unpack :: OsString -> [OsChar] -unpack (OsString x) = OsChar <$> PF.unpack x +unpack = coerce PF.unpack -- | Pack a list of 'OsChar' to an 'OsString' @@ -157,12 +161,18 @@ unpack (OsString x) = OsChar <$> PF.unpack x -- convert from @[Char]@ to 'OsString' is probably not what -- you want, because it will truncate unicode code points. pack :: [OsChar] -> OsString -pack = OsString . PF.pack . fmap (\(OsChar x) -> x) +pack = coerce PF.pack + +empty :: OsString +empty = mempty + +singleton :: OsChar -> OsString +singleton = coerce PF.singleton -- | Truncates on unix to 1 and on Windows to 2 octets. unsafeFromChar :: Char -> OsChar -unsafeFromChar = OsChar . PF.unsafeFromChar +unsafeFromChar = coerce PF.unsafeFromChar -- | Converts back to a unicode codepoint (total). toChar :: OsChar -> Char @@ -172,3 +182,540 @@ toChar (OsChar (WindowsChar w)) = chr $ fromIntegral w toChar (OsChar (PosixChar w)) = chr $ fromIntegral w #endif +-- | /O(n)/ Append a byte to the end of a 'OsString' +-- +-- @since 1.4.200.0 +snoc :: OsString -> OsChar -> OsString +snoc = coerce PF.snoc + +-- | /O(n)/ 'cons' is analogous to (:) for lists. +-- +-- @since 1.4.200.0 +cons :: OsChar -> OsString -> OsString +cons = coerce PF.cons + +-- | /O(1)/ Extract the last element of a OsString, which must be finite and non-empty. +-- An exception will be thrown in the case of an empty OsString. +-- +-- This is a partial function, consider using 'unsnoc' instead. +-- +-- @since 1.4.200.0 +last :: HasCallStack => OsString -> OsChar +last = coerce PF.last + +-- | /O(n)/ Extract the elements after the head of a OsString, which must be non-empty. +-- An exception will be thrown in the case of an empty OsString. +-- +-- This is a partial function, consider using 'uncons' instead. +-- +-- @since 1.4.200.0 +tail :: HasCallStack => OsString -> OsString +tail = coerce PF.tail + +-- | /O(n)/ Extract the 'head' and 'tail' of a OsString, returning 'Nothing' +-- if it is empty. +-- +-- @since 1.4.200.0 +uncons :: OsString -> Maybe (OsChar, OsString) +uncons = coerce PF.uncons + +-- | /O(1)/ Extract the first element of a OsString, which must be non-empty. +-- An exception will be thrown in the case of an empty OsString. +-- +-- This is a partial function, consider using 'uncons' instead. +-- +-- @since 1.4.200.0 +head :: HasCallStack => OsString -> OsChar +head = coerce PF.head + +-- | /O(n)/ Return all the elements of a 'OsString' except the last one. +-- An exception will be thrown in the case of an empty OsString. +-- +-- This is a partial function, consider using 'unsnoc' instead. +-- +-- @since 1.4.200.0 +init :: HasCallStack => OsString -> OsString +init = coerce PF.init + +-- | /O(n)/ Extract the 'init' and 'last' of a OsString, returning 'Nothing' +-- if it is empty. +-- +-- @since 1.4.200.0 +unsnoc :: OsString -> Maybe (OsString, OsChar) +unsnoc = coerce PF.unsnoc + +-- | /O(1)/ Test whether a 'OsString' is empty. +-- +-- @since 1.4.200.0 +null :: OsString -> Bool +null = coerce PF.null + +-- | /O(1)/ The length of a 'OsString'. +-- +-- @since 1.4.200.0 +length :: OsString -> Int +length = coerce PF.length + +-- | /O(n)/ 'map' @f xs@ is the OsString obtained by applying @f@ to each +-- element of @xs@. +-- +-- @since 1.4.200.0 +map :: (OsChar -> OsChar) -> OsString -> OsString +map = coerce PF.map + +-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. +-- +-- @since 1.4.200.0 +reverse :: OsString -> OsString +reverse = coerce PF.reverse + +-- | /O(n)/ The 'intercalate' function takes a 'OsString' and a list of +-- 'OsString's and concatenates the list after interspersing the first +-- argument between each element of the list. +-- +-- @since 1.4.200.0 +intercalate :: OsString -> [OsString] -> OsString +intercalate = coerce PF.intercalate + +-- | 'foldl', applied to a binary operator, a starting value (typically +-- the left-identity of the operator), and a OsString, reduces the +-- OsString using the binary operator, from left to right. +-- +-- @since 1.4.200.0 +foldl :: forall a. (a -> OsChar -> a) -> a -> OsString -> a +foldl = coerce (PF.foldl @a) + +-- | 'foldl'' is like 'foldl', but strict in the accumulator. +-- +-- @since 1.4.200.0 +foldl' :: forall a. (a -> OsChar -> a) -> a -> OsString -> a +foldl' = coerce (PF.foldl' @a) + +-- | 'foldl1' is a variant of 'foldl' that has no starting value +-- argument, and thus must be applied to non-empty 'OsString's. +-- An exception will be thrown in the case of an empty OsString. +-- +-- @since 1.4.200.0 +foldl1 :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar +foldl1 = coerce PF.foldl1 + +-- | 'foldl1'' is like 'foldl1', but strict in the accumulator. +-- An exception will be thrown in the case of an empty OsString. +-- +-- @since 1.4.200.0 +foldl1' :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar +foldl1' = coerce PF.foldl1' + + +-- | 'foldr', applied to a binary operator, a starting value +-- (typically the right-identity of the operator), and a OsString, +-- reduces the OsString using the binary operator, from right to left. +-- +-- @since 1.4.200.0 +foldr :: forall a. (OsChar -> a -> a) -> a -> OsString -> a +foldr = coerce (PF.foldr @a) + +-- | 'foldr'' is like 'foldr', but strict in the accumulator. +-- +-- @since 1.4.200.0 +foldr' :: forall a. (OsChar -> a -> a) -> a -> OsString -> a +foldr' = coerce (PF.foldr' @a) + +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, +-- and thus must be applied to non-empty 'OsString's +-- An exception will be thrown in the case of an empty OsString. +-- +-- @since 1.4.200.0 +foldr1 :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar +foldr1 = coerce PF.foldr1 + +-- | 'foldr1'' is a variant of 'foldr1', but is strict in the +-- accumulator. +-- +-- @since 1.4.200.0 +foldr1' :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar +foldr1' = coerce PF.foldr1' + +-- | /O(n)/ Applied to a predicate and a 'OsString', 'all' determines +-- if all elements of the 'OsString' satisfy the predicate. +-- +-- @since 1.4.200.0 +all :: (OsChar -> Bool) -> OsString -> Bool +all = coerce PF.all + +-- | /O(n)/ Applied to a predicate and a 'OsString', 'any' determines if +-- any element of the 'OsString' satisfies the predicate. +-- +-- @since 1.4.200.0 +any :: (OsChar -> Bool) -> OsString -> Bool +any = coerce PF.any + +-- /O(n)/ Concatenate a list of OsStrings. +-- +-- @since 1.4.200.0 +concat :: [OsString] -> OsString +concat = mconcat + +-- | /O(n)/ 'replicate' @n x@ is a OsString of length @n@ with @x@ +-- the value of every element. The following holds: +-- +-- > replicate w c = unfoldr w (\u -> Just (u,u)) c +-- +-- @since 1.4.200.0 +replicate :: Int -> OsChar -> OsString +replicate = coerce PF.replicate + +-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' +-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a +-- OsString from a seed value. The function takes the element and +-- returns 'Nothing' if it is done producing the OsString or returns +-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, +-- and @b@ is the seed value for further production. +-- +-- This function is not efficient/safe. It will build a list of @[Word8]@ +-- and run the generator until it returns `Nothing`, otherwise recurse infinitely, +-- then finally create a 'OsString'. +-- +-- If you know the maximum length, consider using 'unfoldrN'. +-- +-- Examples: +-- +-- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 +-- > == pack [0, 1, 2, 3, 4, 5] +-- +-- @since 1.4.200.0 +unfoldr :: forall a. (a -> Maybe (OsChar, a)) -> a -> OsString +unfoldr = coerce (PF.unfoldr @a) + +-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a OsString from a seed +-- value. However, the length of the result is limited by the first +-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' +-- when the maximum length of the result is known. +-- +-- The following equation relates 'unfoldrN' and 'unfoldr': +-- +-- > fst (unfoldrN n f s) == take n (unfoldr f s) +-- +-- @since 1.4.200.0 +unfoldrN :: forall a. Int -> (a -> Maybe (OsChar, a)) -> a -> (OsString, Maybe a) +unfoldrN = coerce (PF.unfoldrN @a) + +-- | /O(n)/ 'take' @n@, applied to a OsString @xs@, returns the prefix +-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. +-- +-- @since 1.4.200.0 +take :: Int -> OsString -> OsString +take = coerce PF.take + +-- | /O(n)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. +-- Takes @n@ elements from end of bytestring. +-- +-- >>> takeEnd 3 "abcdefg" +-- "efg" +-- >>> takeEnd 0 "abcdefg" +-- "" +-- >>> takeEnd 4 "abc" +-- "abc" +-- +-- @since 1.4.200.0 +takeEnd :: Int -> OsString -> OsString +takeEnd = coerce PF.takeEnd + +-- | Returns the longest (possibly empty) suffix of elements +-- satisfying the predicate. +-- +-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. +-- +-- @since 1.4.200.0 +takeWhileEnd :: (OsChar -> Bool) -> OsString -> OsString +takeWhileEnd = coerce PF.takeWhileEnd + +-- | Similar to 'Prelude.takeWhile', +-- returns the longest (possibly empty) prefix of elements +-- satisfying the predicate. +-- +-- @since 1.4.200.0 +takeWhile :: (OsChar -> Bool) -> OsString -> OsString +takeWhile = coerce PF.takeWhile + +-- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or 'empty' if @n > 'length' xs@. +-- +-- @since 1.4.200.0 +drop :: Int -> OsString -> OsString +drop = coerce PF.drop + +-- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. +-- Drops @n@ elements from end of bytestring. +-- +-- >>> dropEnd 3 "abcdefg" +-- "abcd" +-- >>> dropEnd 0 "abcdefg" +-- "abcdefg" +-- >>> dropEnd 4 "abc" +-- "" +-- +-- @since 1.4.200.0 +dropEnd :: Int -> OsString -> OsString +dropEnd = coerce PF.dropEnd + +-- | Similar to 'Prelude.dropWhile', +-- drops the longest (possibly empty) prefix of elements +-- satisfying the predicate and returns the remainder. +-- +-- @since 1.4.200.0 +dropWhile :: (OsChar -> Bool) -> OsString -> OsString +dropWhile = coerce PF.dropWhile + +-- | Similar to 'Prelude.dropWhileEnd', +-- drops the longest (possibly empty) suffix of elements +-- satisfying the predicate and returns the remainder. +-- +-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. +-- +-- @since 1.4.200.0 +dropWhileEnd :: (OsChar -> Bool) -> OsString -> OsString +dropWhileEnd = coerce PF.dropWhileEnd + +-- | Returns the longest (possibly empty) suffix of elements which __do not__ +-- satisfy the predicate and the remainder of the string. +-- +-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. +-- +-- @since 1.4.200.0 +breakEnd :: (OsChar -> Bool) -> OsString -> (OsString, OsString) +breakEnd = coerce PF.breakEnd + +-- | Similar to 'Prelude.break', +-- returns the longest (possibly empty) prefix of elements which __do not__ +-- satisfy the predicate and the remainder of the string. +-- +-- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. +-- +-- @since 1.4.200.0 +break :: (OsChar -> Bool) -> OsString -> (OsString, OsString) +break = coerce PF.break + +-- | Similar to 'Prelude.span', +-- returns the longest (possibly empty) prefix of elements +-- satisfying the predicate and the remainder of the string. +-- +-- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. +-- +-- @since 1.4.200.0 +span :: (OsChar -> Bool) -> OsString -> (OsString, OsString) +span = coerce PF.span + +-- | Returns the longest (possibly empty) suffix of elements +-- satisfying the predicate and the remainder of the string. +-- +-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. +-- +-- We have +-- +-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") +-- +-- and +-- +-- > spanEnd (not . isSpace) sbs +-- > == +-- > let (x, y) = span (not . isSpace) (reverse sbs) in (reverse y, reverse x) +-- +-- @since 1.4.200.0 +spanEnd :: (OsChar -> Bool) -> OsString -> (OsString, OsString) +spanEnd = coerce PF.spanEnd + +-- | /O(n)/ 'splitAt' @n sbs@ is equivalent to @('take' n sbs, 'drop' n sbs)@. +-- +-- @since 1.4.200.0 +splitAt :: Int -> OsString -> (OsString, OsString) +splitAt = coerce PF.splitAt + +-- | /O(n)/ Break a 'OsString' into pieces separated by the byte +-- argument, consuming the delimiter. I.e. +-- +-- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 +-- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 +-- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 +-- > split undefined "" == [] -- and not [""] +-- +-- and +-- +-- > intercalate [c] . split c == id +-- > split == splitWith . (==) +-- +-- @since 1.4.200.0 +split :: OsChar -> OsString -> [OsString] +split = coerce PF.split + +-- | /O(n)/ Splits a 'OsString' into components delimited by +-- separators, where the predicate returns True for a separator element. +-- The resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 +-- > splitWith undefined "" == [] -- and not [""] +-- +-- @since 1.4.200.0 +splitWith :: (OsChar -> Bool) -> OsString -> [OsString] +splitWith = coerce PF.splitWith + +-- | /O(n)/ The 'stripSuffix' function takes two OsStrings and returns 'Just' +-- the remainder of the second iff the first is its suffix, and otherwise +-- 'Nothing'. +-- +-- @since 1.4.200.0 +stripSuffix :: OsString -> OsString -> Maybe OsString +stripSuffix = coerce PF.stripSuffix + +-- | /O(n)/ The 'stripPrefix' function takes two OsStrings and returns 'Just' +-- the remainder of the second iff the first is its prefix, and otherwise +-- 'Nothing'. +-- +-- @since 1.4.200.0 +stripPrefix :: OsString -> OsString -> Maybe OsString +stripPrefix = coerce PF.stripPrefix + + +-- | Check whether one string is a substring of another. +-- +-- @since 1.4.200.0 +isInfixOf :: OsString -> OsString -> Bool +isInfixOf = coerce PF.isInfixOf + +-- |/O(n)/ The 'isPrefixOf' function takes two OsStrings and returns 'True' +-- +-- @since 1.4.200.0 +isPrefixOf :: OsString -> OsString -> Bool +isPrefixOf = coerce PF.isPrefixOf + +-- | /O(n)/ The 'isSuffixOf' function takes two OsStrings and returns 'True' +-- iff the first is a suffix of the second. +-- +-- The following holds: +-- +-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y +-- +-- @since 1.4.200.0 +isSuffixOf :: OsString -> OsString -> Bool +isSuffixOf = coerce PF.isSuffixOf + +-- | Break a string on a substring, returning a pair of the part of the +-- string prior to the match, and the rest of the string. +-- +-- The following relationships hold: +-- +-- > break (== c) l == breakSubstring (singleton c) l +-- +-- For example, to tokenise a string, dropping delimiters: +-- +-- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) +-- > where (h,t) = breakSubstring x y +-- +-- To skip to the first occurrence of a string: +-- +-- > snd (breakSubstring x y) +-- +-- To take the parts of a string before a delimiter: +-- +-- > fst (breakSubstring x y) +-- +-- Note that calling `breakSubstring x` does some preprocessing work, so +-- you should avoid unnecessarily duplicating breakSubstring calls with the same +-- pattern. +-- +-- @since 1.4.200.0 +breakSubstring :: OsString -> OsString -> (OsString, OsString) +breakSubstring = coerce PF.breakSubstring + +-- | /O(n)/ 'elem' is the 'OsString' membership predicate. +-- +-- @since 1.4.200.0 +elem :: OsChar -> OsString -> Bool +elem = coerce PF.elem + +-- | /O(n)/ The 'find' function takes a predicate and a OsString, +-- and returns the first element in matching the predicate, or 'Nothing' +-- if there is no such element. +-- +-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing +-- +-- @since 1.4.200.0 +find :: (OsChar -> Bool) -> OsString -> Maybe OsChar +find = coerce PF.find + +-- | /O(n)/ 'filter', applied to a predicate and a OsString, +-- returns a OsString containing those characters that satisfy the +-- predicate. +-- +-- @since 1.4.200.0 +filter :: (OsChar -> Bool) -> OsString -> OsString +filter = coerce PF.filter + +-- | /O(n)/ The 'partition' function takes a predicate a OsString and returns +-- the pair of OsStrings with elements which do and do not satisfy the +-- predicate, respectively; i.e., +-- +-- > partition p bs == (filter p sbs, filter (not . p) sbs) +-- +-- @since 1.4.200.0 +partition :: (OsChar -> Bool) -> OsString -> (OsString, OsString) +partition = coerce PF.partition + +-- | /O(1)/ 'OsString' index (subscript) operator, starting from 0. +-- +-- @since 1.4.200.0 +index :: HasCallStack => OsString -> Int -> OsChar +index = coerce PF.index + +-- | /O(1)/ 'OsString' index, starting from 0, that returns 'Just' if: +-- +-- > 0 <= n < length bs +-- +-- @since 1.4.200.0 +indexMaybe :: OsString -> Int -> Maybe OsChar +indexMaybe = coerce PF.indexMaybe + +-- | /O(1)/ 'OsString' index, starting from 0, that returns 'Just' if: +-- +-- > 0 <= n < length bs +-- +-- @since 1.4.200.0 +(!?) :: OsString -> Int -> Maybe OsChar +(!?) = indexMaybe + +-- | /O(n)/ The 'elemIndex' function returns the index of the first +-- element in the given 'OsString' which is equal to the query +-- element, or 'Nothing' if there is no such element. +-- +-- @since 1.4.200.0 +elemIndex :: OsChar -> OsString -> Maybe Int +elemIndex = coerce PF.elemIndex + +-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning +-- the indices of all elements equal to the query element, in ascending order. +-- +-- @since 1.4.200.0 +elemIndices :: OsChar -> OsString -> [Int] +elemIndices = coerce PF.elemIndices + +-- | count returns the number of times its argument appears in the OsString +-- +-- @since 1.4.200.0 +count :: OsChar -> OsString -> Int +count = coerce PF.count + +-- | /O(n)/ The 'findIndex' function takes a predicate and a 'OsString' and +-- returns the index of the first element in the OsString +-- satisfying the predicate. +-- +-- @since 1.4.200.0 +findIndex :: (OsChar -> Bool) -> OsString -> Maybe Int +findIndex = coerce PF.findIndex + +-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the +-- indices of all elements satisfying the predicate, in ascending order. +-- +-- @since 1.4.200.0 +findIndices :: (OsChar -> Bool) -> OsString -> [Int] +findIndices = coerce PF.findIndices + diff --git a/changelog.md b/changelog.md index a5bca439..9d4bb126 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,10 @@ _Note: below all `FilePath` values are unquoted, so `\\` really means two backslashes._ +## 1.4.200.0 *??? 2023* + +* Introduce bytestring-like functions (substrings, predicates, searching, etc.) to `System.OsString`, `System.OsString.Windows` and `System.OsString.Posix` + ## 1.4.100.4 *Jul 2023* * Fix isInfixOf and breakSubString in Word16, wrt [#195](https://github.com/haskell/filepath/issues/195) diff --git a/filepath.cabal b/filepath.cabal index c3b76370..ae4eba4f 100644 --- a/filepath.cabal +++ b/filepath.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: filepath -version: 1.4.100.4 +version: 1.4.200.0 -- NOTE: Don't forget to update ./changelog.md license: BSD-3-Clause @@ -158,6 +158,9 @@ test-suite bytestring-tests hs-source-dirs: tests tests/bytestring-tests other-modules: Properties.ShortByteString + Properties.WindowsString + Properties.PosixString + Properties.OsString Properties.ShortByteString.Word16 TestUtil diff --git a/tests/abstract-filepath/OsPathSpec.hs b/tests/abstract-filepath/OsPathSpec.hs index bee6fb57..8f334516 100644 --- a/tests/abstract-filepath/OsPathSpec.hs +++ b/tests/abstract-filepath/OsPathSpec.hs @@ -14,8 +14,8 @@ import System.OsPath.Windows as Windows import System.OsPath.Encoding import qualified System.OsString.Internal.Types as OS import System.OsPath.Data.ByteString.Short ( toShort ) -import System.OsString.Posix as PosixS -import System.OsString.Windows as WindowsS +import System.OsString.Posix as PosixS hiding (map) +import System.OsString.Windows as WindowsS hiding (map) import Control.Exception import Data.ByteString ( ByteString ) diff --git a/tests/bytestring-tests/Main.hs b/tests/bytestring-tests/Main.hs index a37e79a9..ae8015a7 100644 --- a/tests/bytestring-tests/Main.hs +++ b/tests/bytestring-tests/Main.hs @@ -2,9 +2,12 @@ module Main (main) where +import qualified Properties.OsString as PropOs +import qualified Properties.PosixString as PropPos +import qualified Properties.WindowsString as PropWin import qualified Properties.ShortByteString as PropSBS import qualified Properties.ShortByteString.Word16 as PropSBSW16 import TestUtil main :: IO () -main = runTests (PropSBS.tests ++ PropSBSW16.tests) +main = runTests (PropSBS.tests ++ PropSBSW16.tests ++ PropWin.tests ++ PropPos.tests ++ PropOs.tests) diff --git a/tests/bytestring-tests/Properties/Common.hs b/tests/bytestring-tests/Properties/Common.hs index c5ef566a..77554be3 100644 --- a/tests/bytestring-tests/Properties/Common.hs +++ b/tests/bytestring-tests/Properties/Common.hs @@ -5,6 +5,10 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# OPTIONS_GHC -Wno-orphans #-} -- We are happy to sacrifice optimizations in exchange for faster compilation, @@ -15,20 +19,46 @@ -fmax-simplifier-iterations=1 -fsimplifier-phases=0 -fno-call-arity -fno-case-merge -fno-cmm-elim-common-blocks -fno-cmm-sink -fno-cpr-anal -fno-cse -fno-do-eta-reduction -fno-float-in -fno-full-laziness - -fno-loopification -fno-specialise -fno-strictness #-} + -fno-loopification -fno-specialise -fno-strictness -Wno-unused-imports -Wno-unused-top-binds #-} + +#ifdef OSWORD +module Properties.OsString (tests) where +import System.OsString.Internal.Types (OsString(..), OsChar(..), getOsChar) +import qualified System.OsString as B +import qualified System.OsString as BS +import qualified System.OsPath.Data.ByteString.Short.Internal as BSI (_nul, isSpace) + +#else #ifdef WORD16 +#ifdef WIN +module Properties.WindowsString (tests) where +import qualified System.OsString.Windows as B +import qualified System.OsString.Windows as BS +#else module Properties.ShortByteString.Word16 (tests) where import System.OsPath.Data.ByteString.Short.Internal (_nul, isSpace) import qualified System.OsPath.Data.ByteString.Short.Word16 as B import qualified System.OsPath.Data.ByteString.Short as BS +#endif +#else +#ifdef POSIX +module Properties.PosixString (tests) where +import qualified System.OsString.Posix as B +import qualified System.OsString.Posix as BS #else module Properties.ShortByteString (tests) where import qualified System.OsPath.Data.ByteString.Short as B -import qualified Data.Char as C #endif +#endif +#endif + import Data.ByteString.Short (ShortByteString) +import qualified Data.Char as C +import qualified System.OsPath.Data.ByteString.Short.Word16 as B16 +import qualified System.OsPath.Data.ByteString.Short as B8 + import Data.Word import Control.Arrow @@ -40,7 +70,157 @@ import Test.QuickCheck import Test.QuickCheck.Monadic ( monadicIO, run ) import Text.Show.Functions () +import System.OsString.Internal.Types (WindowsString(..), WindowsChar(..), getWindowsChar, PosixChar(..), PosixString(..), getPosixChar, OsString(..), OsChar(..), getOsChar) +import qualified System.OsString.Posix as PBS +import qualified System.OsString.Windows as WBS +import qualified System.OsString as OBS +import qualified System.OsPath.Data.ByteString.Short.Internal as BSI (_nul, isSpace) + + +instance Arbitrary PosixString where + arbitrary = do + bs <- sized sizedByteString' + n <- choose (0, 2) + return (PBS.drop n bs) -- to give us some with non-0 offset + where + sizedByteString' :: Int -> Gen PosixString + sizedByteString' n = do m <- choose(0, n) + fmap (PosixString . B8.pack) $ vectorOf m arbitrary + +instance Arbitrary PosixChar where + arbitrary = fmap PosixChar (arbitrary @Word8) + +instance CoArbitrary PosixChar where + coarbitrary s = coarbitrary (PBS.toChar s) + +instance CoArbitrary PosixString where + coarbitrary s = coarbitrary (PBS.unpack s) + +deriving instance Num PosixChar + +deriving instance Bounded PosixChar + +instance Arbitrary WindowsString where + arbitrary = do + bs <- sized sizedByteString' + n <- choose (0, 2) + return (WBS.drop n bs) -- to give us some with non-0 offset + where + sizedByteString' :: Int -> Gen WindowsString + sizedByteString' n = do m <- choose(0, n) + fmap (WindowsString . B16.pack) $ vectorOf m arbitrary + +instance Arbitrary WindowsChar where + arbitrary = fmap WindowsChar (arbitrary @Word16) + +instance CoArbitrary WindowsChar where + coarbitrary s = coarbitrary (WBS.toChar s) + +instance CoArbitrary WindowsString where + coarbitrary s = coarbitrary (WBS.unpack s) + +deriving instance Num WindowsChar + +deriving instance Bounded WindowsChar + +isSpaceWin :: WindowsChar -> Bool +isSpaceWin = BSI.isSpace . getWindowsChar + +numWordWin :: WindowsString -> Int +numWordWin = B16.numWord16 . getWindowsString + + +swapWWin :: WindowsChar -> WindowsChar +swapWWin = WindowsChar . byteSwap16 . getWindowsChar + +isSpacePosix :: PosixChar -> Bool +isSpacePosix = C.isSpace . word8ToChar . getPosixChar + +numWordPosix :: PosixString -> Int +numWordPosix = B8.length . getPosixString + + +swapWPosix :: PosixChar -> PosixChar +swapWPosix = id + +#ifdef OSWORD +isSpace :: OsChar -> Bool +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +isSpace = isSpaceWin . getOsChar +#else +isSpace = isSpacePosix . getOsChar +#endif + +numWord :: OsString -> Int +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +numWord = numWordWin . getOsString +#else +numWord = numWordPosix . getOsString +#endif + +toElem :: OsChar -> OsChar +toElem = id + +swapW :: OsChar -> OsChar +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +swapW = OsChar . swapWWin . getOsChar +#else +swapW = OsChar . swapWPosix . getOsChar +#endif + +instance Arbitrary OsString where + arbitrary = OsString <$> arbitrary + +instance Arbitrary OsChar where + arbitrary = OsChar <$> arbitrary + +instance CoArbitrary OsChar where + coarbitrary s = coarbitrary (OBS.toChar s) + +instance CoArbitrary OsString where + coarbitrary s = coarbitrary (OBS.unpack s) + +deriving instance Num OsChar +deriving instance Bounded OsChar + +instance Arbitrary ShortByteString where +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) + arbitrary = getWindowsString <$> arbitrary +#else + arbitrary = getPosixString <$> arbitrary +#endif + +#else + #ifdef WORD16 + +instance Arbitrary ShortByteString where + arbitrary = do + bs <- sized sizedByteString + n <- choose (0, 2) + return (B16.drop n bs) -- to give us some with non-0 offset + where + sizedByteString :: Int -> Gen ShortByteString + sizedByteString n = do m <- choose(0, n) + fmap B16.pack $ vectorOf m arbitrary + +instance CoArbitrary ShortByteString where + coarbitrary s = coarbitrary (B16.unpack s) +#ifdef WIN + +isSpace :: WindowsChar -> Bool +isSpace = isSpaceWin + +numWord :: WindowsString -> Int +numWord = numWordWin + +toElem :: WindowsChar -> WindowsChar +toElem = id + +swapW :: WindowsChar -> WindowsChar +swapW = swapWWin + +#else numWord :: ShortByteString -> Int numWord = B.numWord16 @@ -50,18 +230,22 @@ toElem = id swapW :: Word16 -> Word16 swapW = byteSwap16 -sizedByteString :: Int -> Gen ShortByteString -sizedByteString n = do m <- choose(0, n) - fmap B.pack $ vectorOf m arbitrary -instance Arbitrary ShortByteString where - arbitrary = do - bs <- sized sizedByteString - n <- choose (0, 2) - return (B.drop n bs) -- to give us some with non-0 offset +#endif +#else +#ifdef POSIX -instance CoArbitrary ShortByteString where - coarbitrary s = coarbitrary (B.unpack s) +isSpace :: PosixChar -> Bool +isSpace = isSpacePosix + +numWord :: PosixString -> Int +numWord = numWordPosix + +toElem :: PosixChar -> PosixChar +toElem = id + +swapW :: PosixChar -> PosixChar +swapW = swapWPosix #else _nul :: Word8 @@ -70,12 +254,9 @@ _nul = 0x00 isSpace :: Word8 -> Bool isSpace = C.isSpace . word8ToChar --- | Total conversion to char. -word8ToChar :: Word8 -> Char -word8ToChar = C.chr . fromIntegral numWord :: ShortByteString -> Int -numWord = B.length +numWord = B8.length toElem :: Word8 -> Word8 toElem = id @@ -84,20 +265,23 @@ swapW :: Word8 -> Word8 swapW = id -sizedByteString :: Int -> Gen ShortByteString -sizedByteString n = do m <- choose(0, n) - fmap B.pack $ vectorOf m arbitrary + +#endif instance Arbitrary ShortByteString where arbitrary = do - bs <- sized sizedByteString + bs <- sized sizedByteString' n <- choose (0, 2) - return (B.drop n bs) -- to give us some with non-0 offset - shrink = map B.pack . shrink . B.unpack + return (B8.drop n bs) -- to give us some with non-0 offset + where + sizedByteString' :: Int -> Gen ShortByteString + sizedByteString' n = do m <- choose(0, n) + fmap B8.pack $ vectorOf m arbitrary + shrink = map B8.pack . shrink . B8.unpack instance CoArbitrary ShortByteString where - coarbitrary s = coarbitrary (B.unpack s) - + coarbitrary s = coarbitrary (B8.unpack s) +#endif #endif @@ -132,7 +316,7 @@ tests = , ("compare LT empty", property $ \x -> not (B.null x) ==> compare B.empty x == LT) , ("compare GT concat", - property $ \x y -> not (B.null y) ==> compare (x <> y) x == GT) + property $ \x y -> not (B.null y) ==> compare (x `mappend` y) x == GT) , ("compare char" , property $ \(toElem -> c) (toElem -> d) -> compare (swapW c) (swapW d) == compare (B.singleton c) (B.singleton d)) , ("compare unsigned", @@ -150,6 +334,16 @@ tests = once $ B.unpack mempty === []) #ifdef WORD16 +#ifdef WIN + , ("isInfixOf works correctly under UTF16", + once $ + let foo = WindowsString $ B8.pack [0xbb, 0x03] + foo' = WindowsString $ B8.pack [0xd2, 0xbb] + bar = WindowsString $ B8.pack [0xd2, 0xbb, 0x03, 0xad] + bar' = WindowsString $ B8.pack [0xd2, 0xbb, 0x03, 0xad, 0xd2, 0xbb, 0x03, 0xad, 0xbb, 0x03, 0x00, 0x00] + in [B.isInfixOf foo bar, B.isInfixOf foo' bar, B.isInfixOf foo bar'] === [False, True, True] + ) +#else , ("isInfixOf works correctly under UTF16", once $ let foo = BS.pack [0xbb, 0x03] @@ -158,6 +352,7 @@ tests = bar' = BS.pack [0xd2, 0xbb, 0x03, 0xad, 0xd2, 0xbb, 0x03, 0xad, 0xbb, 0x03, 0x00, 0x00] in [B.isInfixOf foo bar, B.isInfixOf foo' bar, B.isInfixOf foo bar'] === [False, True, True] ) +#endif #endif , ("break breakSubstring", property $ \(toElem -> c) x -> B.break (== c) x === B.breakSubstring (B.singleton c) x @@ -193,7 +388,7 @@ tests = , ("mappend" , property $ \x y -> B.unpack (mappend x y) === B.unpack x `mappend` B.unpack y) , ("<>" , - property $ \x y -> B.unpack (x <> y) === B.unpack x <> B.unpack y) + property $ \x y -> B.unpack (x `mappend` y) === B.unpack x `mappend` B.unpack y) , ("stimes" , property $ \(Positive n) x -> stimes (n :: Int) (x :: ShortByteString) === mtimesDefault n x) @@ -407,14 +602,15 @@ tests = -- property $ \n f (toElem -> a) -> B.unpack (B.take (fromIntegral n) (B.unfoldr (fmap (first toElem) . f) a)) === -- take n (unfoldr (fmap (first toElem) . f) a)) -- -#ifdef WORD16 +#if defined(WORD16) && !defined(WIN) && !defined(OSWORD) && !defined(POSIX) , ("useAsCWString str packCWString == str" , property $ \x -> not (B.any (== _nul) x) ==> monadicIO $ run (B.useAsCWString x B.packCWString >>= \x' -> pure (x == x'))) , ("useAsCWStringLen str packCWStringLen == str" , property $ \x -> not (B.any (== _nul) x) ==> monadicIO $ run (B.useAsCWStringLen x B.packCWStringLen >>= \x' -> pure (x == x'))) -#else +#endif +#if !defined(WORD16) && !defined(WIN) && !defined(OSWORD) && !defined(POSIX) , ("useAsCString str packCString == str" , property $ \x -> not (B.any (== _nul) x) ==> monadicIO $ run (B.useAsCString x B.packCString >>= \x' -> pure (x == x'))) @@ -439,3 +635,7 @@ splitWith f ys = go [] ys unsnoc :: [a] -> Maybe ([a], a) unsnoc [] = Nothing unsnoc xs = Just (init xs, last xs) + +-- | Total conversion to char. +word8ToChar :: Word8 -> Char +word8ToChar = C.chr . fromIntegral diff --git a/tests/bytestring-tests/Properties/OsString.hs b/tests/bytestring-tests/Properties/OsString.hs new file mode 100644 index 00000000..e81348b7 --- /dev/null +++ b/tests/bytestring-tests/Properties/OsString.hs @@ -0,0 +1,7 @@ +{-# LANGUAGE CPP #-} +#undef WORD16 +#undef POSIX +#undef WIN +#define OSWORD +#include "Common.hs" + diff --git a/tests/bytestring-tests/Properties/PosixString.hs b/tests/bytestring-tests/Properties/PosixString.hs new file mode 100644 index 00000000..e0b9d981 --- /dev/null +++ b/tests/bytestring-tests/Properties/PosixString.hs @@ -0,0 +1,7 @@ +{-# LANGUAGE CPP #-} +#undef WORD16 +#define POSIX +#undef WIN +#undef OSWORD +#include "Common.hs" + diff --git a/tests/bytestring-tests/Properties/ShortByteString.hs b/tests/bytestring-tests/Properties/ShortByteString.hs index 3040dfb8..97c91090 100644 --- a/tests/bytestring-tests/Properties/ShortByteString.hs +++ b/tests/bytestring-tests/Properties/ShortByteString.hs @@ -1,3 +1,7 @@ {-# LANGUAGE CPP #-} #undef WORD16 +#undef WIN +#undef POSIX +#undef OSWORD #include "Common.hs" + diff --git a/tests/bytestring-tests/Properties/ShortByteString/Word16.hs b/tests/bytestring-tests/Properties/ShortByteString/Word16.hs index aa426397..d604ef97 100644 --- a/tests/bytestring-tests/Properties/ShortByteString/Word16.hs +++ b/tests/bytestring-tests/Properties/ShortByteString/Word16.hs @@ -1,3 +1,6 @@ {-# LANGUAGE CPP #-} #define WORD16 +#undef WIN +#undef POSIX +#undef OSWORD #include "../Common.hs" diff --git a/tests/bytestring-tests/Properties/WindowsString.hs b/tests/bytestring-tests/Properties/WindowsString.hs new file mode 100644 index 00000000..1ce96b04 --- /dev/null +++ b/tests/bytestring-tests/Properties/WindowsString.hs @@ -0,0 +1,7 @@ +{-# LANGUAGE CPP #-} +#define WORD16 +#define WIN +#undef POSIX +#undef OSWORD +#include "Common.hs" +