Skip to content

Commit

Permalink
Merge remote-tracking branch 'github/pr/199'
Browse files Browse the repository at this point in the history
  • Loading branch information
hasufell committed Jul 13, 2023
2 parents 61be653 + 094c9e1 commit b91dcf2
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 2 deletions.
34 changes: 34 additions & 0 deletions System/OsPath/Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE UnliftedFFITypes #-}

-- |
-- Module : System.OsPath.Data.ByteString.Short.Internal
Expand All @@ -25,6 +26,13 @@ import Data.ByteString.Short.Internal (ShortByteString(..), length)
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup
( Semigroup((<>)) )
import Foreign.C.Types
( CSize(..)
, CInt(..)
)
import Data.ByteString.Internal
( accursedUnutterablePerformIO
)
#endif
#if !MIN_VERSION_bytestring(0,10,9)
import Foreign.Marshal.Alloc (allocaBytes)
Expand Down Expand Up @@ -441,3 +449,29 @@ errorEmptySBS fun = moduleError fun "empty ShortByteString"
moduleError :: HasCallStack => String -> String -> a
moduleError fun msg = error (moduleErrorMsg fun msg)
{-# NOINLINE moduleError #-}

compareByteArraysOff :: BA -- ^ array 1
-> Int -- ^ offset for array 1
-> BA -- ^ array 2
-> Int -- ^ offset for array 2
-> Int -- ^ length to compare
-> Int -- ^ like memcmp
#if MIN_VERSION_base(4,11,0)
compareByteArraysOff (BA# ba1#) (I# ba1off#) (BA# ba2#) (I# ba2off#) (I# len#) =
I# (compareByteArrays# ba1# ba1off# ba2# ba2off# len#)
#else
compareByteArraysOff (BA# ba1#) ba1off (BA# ba2#) ba2off len =
assert (ba1off + len <= (I# (sizeofByteArray# ba1#)))
$ assert (ba2off + len <= (I# (sizeofByteArray# ba2#)))
$ fromIntegral $ accursedUnutterablePerformIO $
c_memcmp_ByteArray ba1#
ba1off
ba2#
ba2off
(fromIntegral len)


foreign import ccall unsafe "static sbs_memcmp_off"
c_memcmp_ByteArray :: ByteArray# -> Int -> ByteArray# -> Int -> CSize -> IO CInt
#endif

30 changes: 28 additions & 2 deletions System/OsPath/Data/ByteString/Short/Word16.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}

Expand Down Expand Up @@ -143,10 +145,11 @@ module System.OsPath.Data.ByteString.Short.Word16 (
useAsCWStringLen
)
where
import System.OsPath.Data.ByteString.Short ( append, intercalate, concat, stripSuffix, stripPrefix, isInfixOf, isPrefixOf, isSuffixOf, breakSubstring, length, empty, null, ShortByteString(..), fromShort, toShort )
import System.OsPath.Data.ByteString.Short ( append, intercalate, concat, stripSuffix, stripPrefix, isPrefixOf, isSuffixOf, length, empty, null, ShortByteString(..), fromShort, toShort )
import System.OsPath.Data.ByteString.Short.Internal
import Data.Bits
( shiftR )
( shiftR
)
import Data.Word
import Prelude hiding
( Foldable(..)
Expand All @@ -172,6 +175,7 @@ import Prelude hiding
import qualified Data.Foldable as Foldable
import GHC.ST ( ST )
import GHC.Stack ( HasCallStack )
import GHC.Exts ( inline )

import qualified Data.ByteString.Short.Internal as BS
import qualified Data.List as List
Expand Down Expand Up @@ -647,6 +651,28 @@ splitWith p = \(assertEven -> sbs) -> if
| otherwise -> a : go (tail b)


-- | Check whether one string is a substring of another.
isInfixOf :: ShortByteString -> ShortByteString -> Bool
isInfixOf sbs = \s -> null sbs || not (null $ snd $ GHC.Exts.inline breakSubstring sbs s)


-- algorithm: https://github.com/haskell/filepath/issues/195#issuecomment-1605633713
breakSubstring :: ShortByteString -- ^ String to search for
-> ShortByteString -- ^ String to search in
-> (ShortByteString, ShortByteString) -- ^ Head and tail of string broken at substring
breakSubstring bPat@(asBA -> pat) bInp@(asBA -> inp) = go 0
where
lpat = BS.length bPat
linp = BS.length bInp
go ix
| let ix' = ix * 2
, linp >= ix' + lpat =
if | compareByteArraysOff pat 0 inp ix' lpat == 0 -> splitAt ix bInp
| otherwise -> go (ix + 1)
| otherwise
= (bInp, mempty)


-- ---------------------------------------------------------------------
-- Reducing 'ByteString's

Expand Down
23 changes: 23 additions & 0 deletions tests/bytestring-tests/Properties/Common.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
module Properties.ShortByteString.Word16 (tests) where
import System.OsPath.Data.ByteString.Short.Internal (_nul, isSpace)
import qualified System.OsPath.Data.ByteString.Short.Word16 as B
import qualified System.OsPath.Data.ByteString.Short as BS
#else
module Properties.ShortByteString (tests) where
import qualified System.OsPath.Data.ByteString.Short as B
Expand Down Expand Up @@ -148,6 +149,28 @@ tests =
, ("mempty []",
once $ B.unpack mempty === [])

#ifdef WORD16
, ("isInfixOf works correctly under UTF16",
once $
let foo = BS.pack [0xbb, 0x03]
foo' = BS.pack [0xd2, 0xbb]
bar = BS.pack [0xd2, 0xbb, 0x03, 0xad]
bar' = BS.pack [0xd2, 0xbb, 0x03, 0xad, 0xd2, 0xbb, 0x03, 0xad, 0xbb, 0x03, 0x00, 0x00]
in [B.isInfixOf foo bar, B.isInfixOf foo' bar, B.isInfixOf foo bar'] === [False, True, True]
)
#endif
, ("break breakSubstring",
property $ \(toElem -> c) x -> B.break (== c) x === B.breakSubstring (B.singleton c) x
)
, ("breakSubstring",
property $ \x y -> not (B.null x) ==> B.null (snd (B.breakSubstring x y)) === not (B.isInfixOf x y)
)
, ("breakSubstring empty",
property $ \x -> B.breakSubstring B.empty x === (B.empty, x)
)
, ("isInfixOf",
property $ \x y -> B.isInfixOf x y === L.isInfixOf (B.unpack x) (B.unpack y))

, ("mconcat" ,
property $ \xs -> B.unpack (mconcat xs) === mconcat (map B.unpack xs))
, ("mconcat [x,x]" ,
Expand Down

0 comments on commit b91dcf2

Please sign in to comment.