From 53d663e00a16863aae0e133795237e463783f889 Mon Sep 17 00:00:00 2001 From: Clint Adams Date: Sat, 27 Jul 2019 19:22:18 -0400 Subject: [PATCH] Import haskell-unordered-containers_0.2.10.0.orig.tar.gz [dgit import orig haskell-unordered-containers_0.2.10.0.orig.tar.gz] --- CHANGES.md | 81 ++ Data/HashMap/Array.hs | 583 ++++++++++ Data/HashMap/Base.hs | 1895 +++++++++++++++++++++++++++++++++ Data/HashMap/Lazy.hs | 103 ++ Data/HashMap/List.hs | 66 ++ Data/HashMap/Strict.hs | 96 ++ Data/HashMap/Strict/Base.hs | 671 ++++++++++++ Data/HashMap/Unsafe.hs | 39 + Data/HashMap/UnsafeShift.hs | 16 + Data/HashSet.hs | 72 ++ Data/HashSet/Base.hs | 327 ++++++ LICENSE | 30 + Setup.hs | 2 + benchmarks/Benchmarks.hs | 453 ++++++++ benchmarks/Util/ByteString.hs | 29 + benchmarks/Util/Int.hs | 19 + benchmarks/Util/String.hs | 34 + tests/HashMapProperties.hs | 463 ++++++++ tests/HashSetProperties.hs | 248 +++++ tests/List.hs | 68 ++ tests/Regressions.hs | 89 ++ tests/Strictness.hs | 192 ++++ unordered-containers.cabal | 218 ++++ 23 files changed, 5794 insertions(+) create mode 100644 CHANGES.md create mode 100644 Data/HashMap/Array.hs create mode 100644 Data/HashMap/Base.hs create mode 100644 Data/HashMap/Lazy.hs create mode 100644 Data/HashMap/List.hs create mode 100644 Data/HashMap/Strict.hs create mode 100644 Data/HashMap/Strict/Base.hs create mode 100644 Data/HashMap/Unsafe.hs create mode 100644 Data/HashMap/UnsafeShift.hs create mode 100644 Data/HashSet.hs create mode 100644 Data/HashSet/Base.hs create mode 100644 LICENSE create mode 100644 Setup.hs create mode 100644 benchmarks/Benchmarks.hs create mode 100644 benchmarks/Util/ByteString.hs create mode 100644 benchmarks/Util/Int.hs create mode 100644 benchmarks/Util/String.hs create mode 100644 tests/HashMapProperties.hs create mode 100644 tests/HashSetProperties.hs create mode 100644 tests/List.hs create mode 100644 tests/Regressions.hs create mode 100644 tests/Strictness.hs create mode 100644 unordered-containers.cabal diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..1b1148f --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,81 @@ +## 0.2.10.0 + + * Add `HashMap.alterF`. + + * Add `HashMap.keysSet`. + + * Make `HashMap.Strict.traverseWithKey` force the results before + installing them in the map. + +## 0.2.9.0 + + * Add `Ord/Ord1/Ord2` instances. (Thanks, Oleg Grenrus) + + * Use `SmallArray#` instead of `Array#` for GHC versions 7.10 and above. + (Thanks, Dmitry Ivanov) + + * Adjust for `Semigroup => Monoid` proposal implementation. + (Thanks, Ryan Scott) + +### Bug fixes + + * Fix a strictness bug in `fromListWith`. + + * Enable eager blackholing for pre-8.2 GHC versions to work around + a runtime system bug. (Thanks, Ben Gamari) + + * Avoid sketchy reimplementation of `ST` when compiling with recent + GHC. + +### Other changes + + * Remove support for GHC versions before 7.8. (Thanks, Dmitry Ivanov) + + * Add internal documentaton. (Thanks, Johan Tibell) + +## 0.2.8.0 + + * Add `Eq1/2`, `Show1/2`, `Read1` instances with `base-4.9` + + * `Eq (HashSet a)` doesn't require `Hashable a` anymore, only `Eq a`. + + * Add `Hashable1/2` with `hashable-1.2.6.0` + + * Add `differenceWith` function. + +## 0.2.7.2 + + * Don't use -fregs-graphs + + * Fix benchmark compilation on stack. + +## 0.2.7.1 + + * Fix linker error related to popcnt. + + * Haddock improvements. + + * Fix benchmark compilation when downloaded from Hackage. + +## 0.2.7.0 + + * Support criterion 1.1 + + * Add unionWithKey for hash maps. + +## 0.2.6.0 + + * Mark several modules as Trustworthy. + + * Add Hashable instances for HashMap and HashSet. + + * Add mapMaybe, mapMaybeWithKey, update, alter, and + intersectionWithKey. + + * Add roles. + + * Add Hashable and Semigroup instances. + +## 0.2.5.1 (2014-10-11) + + * Support base-4.8 diff --git a/Data/HashMap/Array.hs b/Data/HashMap/Array.hs new file mode 100644 index 0000000..0149da4 --- /dev/null +++ b/Data/HashMap/Array.hs @@ -0,0 +1,583 @@ +{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples, ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} + +-- | Zero based arrays. +-- +-- Note that no bounds checking are performed. +module Data.HashMap.Array + ( Array + , MArray + + -- * Creation + , new + , new_ + , singleton + , singletonM + , pair + + -- * Basic interface + , length + , lengthM + , read + , write + , index + , indexM + , index# + , update + , updateWith' + , unsafeUpdateM + , insert + , insertM + , delete + , sameArray1 + , trim + + , unsafeFreeze + , unsafeThaw + , unsafeSameArray + , run + , run2 + , copy + , copyM + + -- * Folds + , foldl' + , foldr + + , thaw + , map + , map' + , traverse + , traverse' + , toList + , fromList + ) where + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative (Applicative (..), (<$>)) +#endif +import Control.Applicative (liftA2) +import Control.DeepSeq +import GHC.Exts(Int(..), Int#, reallyUnsafePtrEquality#, tagToEnum#, unsafeCoerce#, State#) +import GHC.ST (ST(..)) +import Control.Monad.ST (stToIO) + +#if __GLASGOW_HASKELL__ >= 709 +import Prelude hiding (filter, foldr, length, map, read, traverse) +#else +import Prelude hiding (filter, foldr, length, map, read) +#endif + +#if __GLASGOW_HASKELL__ >= 710 +import GHC.Exts (SmallArray#, newSmallArray#, readSmallArray#, writeSmallArray#, + indexSmallArray#, unsafeFreezeSmallArray#, unsafeThawSmallArray#, + SmallMutableArray#, sizeofSmallArray#, copySmallArray#, thawSmallArray#, + sizeofSmallMutableArray#, copySmallMutableArray#, cloneSmallMutableArray#) + +#else +import GHC.Exts (Array#, newArray#, readArray#, writeArray#, + indexArray#, unsafeFreezeArray#, unsafeThawArray#, + MutableArray#, sizeofArray#, copyArray#, thawArray#, + sizeofMutableArray#, copyMutableArray#, cloneMutableArray#) +#endif + +#if defined(ASSERTS) +import qualified Prelude +#endif + +import Data.HashMap.Unsafe (runST) +import Control.Monad ((>=>)) + + +#if __GLASGOW_HASKELL__ >= 710 +type Array# a = SmallArray# a +type MutableArray# a = SmallMutableArray# a + +newArray# :: Int# -> a -> State# d -> (# State# d, SmallMutableArray# d a #) +newArray# = newSmallArray# + +unsafeFreezeArray# :: SmallMutableArray# d a + -> State# d -> (# State# d, SmallArray# a #) +unsafeFreezeArray# = unsafeFreezeSmallArray# + +readArray# :: SmallMutableArray# d a + -> Int# -> State# d -> (# State# d, a #) +readArray# = readSmallArray# + +writeArray# :: SmallMutableArray# d a + -> Int# -> a -> State# d -> State# d +writeArray# = writeSmallArray# + +indexArray# :: SmallArray# a -> Int# -> (# a #) +indexArray# = indexSmallArray# + +unsafeThawArray# :: SmallArray# a + -> State# d -> (# State# d, SmallMutableArray# d a #) +unsafeThawArray# = unsafeThawSmallArray# + +sizeofArray# :: SmallArray# a -> Int# +sizeofArray# = sizeofSmallArray# + +copyArray# :: SmallArray# a + -> Int# + -> SmallMutableArray# d a + -> Int# + -> Int# + -> State# d + -> State# d +copyArray# = copySmallArray# + +cloneMutableArray# :: SmallMutableArray# s a + -> Int# + -> Int# + -> State# s + -> (# State# s, SmallMutableArray# s a #) +cloneMutableArray# = cloneSmallMutableArray# + +thawArray# :: SmallArray# a + -> Int# + -> Int# + -> State# d + -> (# State# d, SmallMutableArray# d a #) +thawArray# = thawSmallArray# + +sizeofMutableArray# :: SmallMutableArray# s a -> Int# +sizeofMutableArray# = sizeofSmallMutableArray# + +copyMutableArray# :: SmallMutableArray# d a + -> Int# + -> SmallMutableArray# d a + -> Int# + -> Int# + -> State# d + -> State# d +copyMutableArray# = copySmallMutableArray# +#endif + +------------------------------------------------------------------------ + +#if defined(ASSERTS) +-- This fugly hack is brought by GHC's apparent reluctance to deal +-- with MagicHash and UnboxedTuples when inferring types. Eek! +# define CHECK_BOUNDS(_func_,_len_,_k_) \ +if (_k_) < 0 || (_k_) >= (_len_) then error ("Data.HashMap.Array." ++ (_func_) ++ ": bounds error, offset " ++ show (_k_) ++ ", length " ++ show (_len_)) else +# define CHECK_OP(_func_,_op_,_lhs_,_rhs_) \ +if not ((_lhs_) _op_ (_rhs_)) then error ("Data.HashMap.Array." ++ (_func_) ++ ": Check failed: _lhs_ _op_ _rhs_ (" ++ show (_lhs_) ++ " vs. " ++ show (_rhs_) ++ ")") else +# define CHECK_GT(_func_,_lhs_,_rhs_) CHECK_OP(_func_,>,_lhs_,_rhs_) +# define CHECK_LE(_func_,_lhs_,_rhs_) CHECK_OP(_func_,<=,_lhs_,_rhs_) +# define CHECK_EQ(_func_,_lhs_,_rhs_) CHECK_OP(_func_,==,_lhs_,_rhs_) +#else +# define CHECK_BOUNDS(_func_,_len_,_k_) +# define CHECK_OP(_func_,_op_,_lhs_,_rhs_) +# define CHECK_GT(_func_,_lhs_,_rhs_) +# define CHECK_LE(_func_,_lhs_,_rhs_) +# define CHECK_EQ(_func_,_lhs_,_rhs_) +#endif + +data Array a = Array { + unArray :: !(Array# a) + } + +instance Show a => Show (Array a) where + show = show . toList + +-- Determines whether two arrays have the same memory address. +-- This is more reliable than testing pointer equality on the +-- Array wrappers, but it's still slightly bogus. +unsafeSameArray :: Array a -> Array b -> Bool +unsafeSameArray (Array xs) (Array ys) = + tagToEnum# (unsafeCoerce# reallyUnsafePtrEquality# xs ys) + +sameArray1 :: (a -> b -> Bool) -> Array a -> Array b -> Bool +sameArray1 eq !xs0 !ys0 + | lenxs /= lenys = False + | otherwise = go 0 xs0 ys0 + where + go !k !xs !ys + | k == lenxs = True + | (# x #) <- index# xs k + , (# y #) <- index# ys k + = eq x y && go (k + 1) xs ys + + !lenxs = length xs0 + !lenys = length ys0 + +length :: Array a -> Int +length ary = I# (sizeofArray# (unArray ary)) +{-# INLINE length #-} + +-- | Smart constructor +array :: Array# a -> Int -> Array a +array ary _n = Array ary +{-# INLINE array #-} + +data MArray s a = MArray { + unMArray :: !(MutableArray# s a) + } + +lengthM :: MArray s a -> Int +lengthM mary = I# (sizeofMutableArray# (unMArray mary)) +{-# INLINE lengthM #-} + +-- | Smart constructor +marray :: MutableArray# s a -> Int -> MArray s a +marray mary _n = MArray mary +{-# INLINE marray #-} + +------------------------------------------------------------------------ + +instance NFData a => NFData (Array a) where + rnf = rnfArray + +rnfArray :: NFData a => Array a -> () +rnfArray ary0 = go ary0 n0 0 + where + n0 = length ary0 + go !ary !n !i + | i >= n = () + | (# x #) <- index# ary i + = rnf x `seq` go ary n (i+1) +-- We use index# just in case GHC can't see that the +-- relevant rnf is strict, or in case it actually isn't. +{-# INLINE rnfArray #-} + +-- | Create a new mutable array of specified size, in the specified +-- state thread, with each element containing the specified initial +-- value. +new :: Int -> a -> ST s (MArray s a) +new n@(I# n#) b = + CHECK_GT("new",n,(0 :: Int)) + ST $ \s -> + case newArray# n# b s of + (# s', ary #) -> (# s', marray ary n #) +{-# INLINE new #-} + +new_ :: Int -> ST s (MArray s a) +new_ n = new n undefinedElem + +singleton :: a -> Array a +singleton x = runST (singletonM x) +{-# INLINE singleton #-} + +singletonM :: a -> ST s (Array a) +singletonM x = new 1 x >>= unsafeFreeze +{-# INLINE singletonM #-} + +pair :: a -> a -> Array a +pair x y = run $ do + ary <- new 2 x + write ary 1 y + return ary +{-# INLINE pair #-} + +read :: MArray s a -> Int -> ST s a +read ary _i@(I# i#) = ST $ \ s -> + CHECK_BOUNDS("read", lengthM ary, _i) + readArray# (unMArray ary) i# s +{-# INLINE read #-} + +write :: MArray s a -> Int -> a -> ST s () +write ary _i@(I# i#) b = ST $ \ s -> + CHECK_BOUNDS("write", lengthM ary, _i) + case writeArray# (unMArray ary) i# b s of + s' -> (# s' , () #) +{-# INLINE write #-} + +index :: Array a -> Int -> a +index ary _i@(I# i#) = + CHECK_BOUNDS("index", length ary, _i) + case indexArray# (unArray ary) i# of (# b #) -> b +{-# INLINE index #-} + +index# :: Array a -> Int -> (# a #) +index# ary _i@(I# i#) = + CHECK_BOUNDS("index#", length ary, _i) + indexArray# (unArray ary) i# +{-# INLINE index# #-} + +indexM :: Array a -> Int -> ST s a +indexM ary _i@(I# i#) = + CHECK_BOUNDS("indexM", length ary, _i) + case indexArray# (unArray ary) i# of (# b #) -> return b +{-# INLINE indexM #-} + +unsafeFreeze :: MArray s a -> ST s (Array a) +unsafeFreeze mary + = ST $ \s -> case unsafeFreezeArray# (unMArray mary) s of + (# s', ary #) -> (# s', array ary (lengthM mary) #) +{-# INLINE unsafeFreeze #-} + +unsafeThaw :: Array a -> ST s (MArray s a) +unsafeThaw ary + = ST $ \s -> case unsafeThawArray# (unArray ary) s of + (# s', mary #) -> (# s', marray mary (length ary) #) +{-# INLINE unsafeThaw #-} + +run :: (forall s . ST s (MArray s e)) -> Array e +run act = runST $ act >>= unsafeFreeze +{-# INLINE run #-} + +run2 :: (forall s. ST s (MArray s e, a)) -> (Array e, a) +run2 k = runST (do + (marr,b) <- k + arr <- unsafeFreeze marr + return (arr,b)) + +-- | Unsafely copy the elements of an array. Array bounds are not checked. +copy :: Array e -> Int -> MArray s e -> Int -> Int -> ST s () +copy !src !_sidx@(I# sidx#) !dst !_didx@(I# didx#) _n@(I# n#) = + CHECK_LE("copy", _sidx + _n, length src) + CHECK_LE("copy", _didx + _n, lengthM dst) + ST $ \ s# -> + case copyArray# (unArray src) sidx# (unMArray dst) didx# n# s# of + s2 -> (# s2, () #) + +-- | Unsafely copy the elements of an array. Array bounds are not checked. +copyM :: MArray s e -> Int -> MArray s e -> Int -> Int -> ST s () +copyM !src !_sidx@(I# sidx#) !dst !_didx@(I# didx#) _n@(I# n#) = + CHECK_BOUNDS("copyM: src", lengthM src, _sidx + _n - 1) + CHECK_BOUNDS("copyM: dst", lengthM dst, _didx + _n - 1) + ST $ \ s# -> + case copyMutableArray# (unMArray src) sidx# (unMArray dst) didx# n# s# of + s2 -> (# s2, () #) + +cloneM :: MArray s a -> Int -> Int -> ST s (MArray s a) +cloneM _mary@(MArray mary#) _off@(I# off#) _len@(I# len#) = + CHECK_BOUNDS("cloneM_off", lengthM _mary, _off - 1) + CHECK_BOUNDS("cloneM_end", lengthM _mary, _off + _len - 1) + ST $ \ s -> + case cloneMutableArray# mary# off# len# s of + (# s', mary'# #) -> (# s', MArray mary'# #) + +-- | Create a new array of the @n@ first elements of @mary@. +trim :: MArray s a -> Int -> ST s (Array a) +trim mary n = cloneM mary 0 n >>= unsafeFreeze +{-# INLINE trim #-} + +-- | /O(n)/ Insert an element at the given position in this array, +-- increasing its size by one. +insert :: Array e -> Int -> e -> Array e +insert ary idx b = runST (insertM ary idx b) +{-# INLINE insert #-} + +-- | /O(n)/ Insert an element at the given position in this array, +-- increasing its size by one. +insertM :: Array e -> Int -> e -> ST s (Array e) +insertM ary idx b = + CHECK_BOUNDS("insertM", count + 1, idx) + do mary <- new_ (count+1) + copy ary 0 mary 0 idx + write mary idx b + copy ary idx mary (idx+1) (count-idx) + unsafeFreeze mary + where !count = length ary +{-# INLINE insertM #-} + +-- | /O(n)/ Update the element at the given position in this array. +update :: Array e -> Int -> e -> Array e +update ary idx b = runST (updateM ary idx b) +{-# INLINE update #-} + +-- | /O(n)/ Update the element at the given position in this array. +updateM :: Array e -> Int -> e -> ST s (Array e) +updateM ary idx b = + CHECK_BOUNDS("updateM", count, idx) + do mary <- thaw ary 0 count + write mary idx b + unsafeFreeze mary + where !count = length ary +{-# INLINE updateM #-} + +-- | /O(n)/ Update the element at the given positio in this array, by +-- applying a function to it. Evaluates the element to WHNF before +-- inserting it into the array. +updateWith' :: Array e -> Int -> (e -> e) -> Array e +updateWith' ary idx f + | (# x #) <- index# ary idx + = update ary idx $! f x +{-# INLINE updateWith' #-} + +-- | /O(1)/ Update the element at the given position in this array, +-- without copying. +unsafeUpdateM :: Array e -> Int -> e -> ST s () +unsafeUpdateM ary idx b = + CHECK_BOUNDS("unsafeUpdateM", length ary, idx) + do mary <- unsafeThaw ary + write mary idx b + _ <- unsafeFreeze mary + return () +{-# INLINE unsafeUpdateM #-} + +foldl' :: (b -> a -> b) -> b -> Array a -> b +foldl' f = \ z0 ary0 -> go ary0 (length ary0) 0 z0 + where + go ary n i !z + | i >= n = z + | otherwise + = case index# ary i of + (# x #) -> go ary n (i+1) (f z x) +{-# INLINE foldl' #-} + +foldr :: (a -> b -> b) -> b -> Array a -> b +foldr f = \ z0 ary0 -> go ary0 (length ary0) 0 z0 + where + go ary n i z + | i >= n = z + | otherwise + = case index# ary i of + (# x #) -> f x (go ary n (i+1) z) +{-# INLINE foldr #-} + +undefinedElem :: a +undefinedElem = error "Data.HashMap.Array: Undefined element" +{-# NOINLINE undefinedElem #-} + +thaw :: Array e -> Int -> Int -> ST s (MArray s e) +thaw !ary !_o@(I# o#) !n@(I# n#) = + CHECK_LE("thaw", _o + n, length ary) + ST $ \ s -> case thawArray# (unArray ary) o# n# s of + (# s2, mary# #) -> (# s2, marray mary# n #) +{-# INLINE thaw #-} + +-- | /O(n)/ Delete an element at the given position in this array, +-- decreasing its size by one. +delete :: Array e -> Int -> Array e +delete ary idx = runST (deleteM ary idx) +{-# INLINE delete #-} + +-- | /O(n)/ Delete an element at the given position in this array, +-- decreasing its size by one. +deleteM :: Array e -> Int -> ST s (Array e) +deleteM ary idx = do + CHECK_BOUNDS("deleteM", count, idx) + do mary <- new_ (count-1) + copy ary 0 mary 0 idx + copy ary (idx+1) mary idx (count-(idx+1)) + unsafeFreeze mary + where !count = length ary +{-# INLINE deleteM #-} + +map :: (a -> b) -> Array a -> Array b +map f = \ ary -> + let !n = length ary + in run $ do + mary <- new_ n + go ary mary 0 n + where + go ary mary i n + | i >= n = return mary + | otherwise = do + x <- indexM ary i + write mary i $ f x + go ary mary (i+1) n +{-# INLINE map #-} + +-- | Strict version of 'map'. +map' :: (a -> b) -> Array a -> Array b +map' f = \ ary -> + let !n = length ary + in run $ do + mary <- new_ n + go ary mary 0 n + where + go ary mary i n + | i >= n = return mary + | otherwise = do + x <- indexM ary i + write mary i $! f x + go ary mary (i+1) n +{-# INLINE map' #-} + +fromList :: Int -> [a] -> Array a +fromList n xs0 = + CHECK_EQ("fromList", n, Prelude.length xs0) + run $ do + mary <- new_ n + go xs0 mary 0 + where + go [] !mary !_ = return mary + go (x:xs) mary i = do write mary i x + go xs mary (i+1) + +toList :: Array a -> [a] +toList = foldr (:) [] + +newtype STA a = STA {_runSTA :: forall s. MutableArray# s a -> ST s (Array a)} + +runSTA :: Int -> STA a -> Array a +runSTA !n (STA m) = runST $ new_ n >>= \ (MArray ar) -> m ar + +traverse :: Applicative f => (a -> f b) -> Array a -> f (Array b) +traverse f = \ !ary -> + let + !len = length ary + go !i + | i == len = pure $ STA $ \mary -> unsafeFreeze (MArray mary) + | (# x #) <- index# ary i + = liftA2 (\b (STA m) -> STA $ \mary -> + write (MArray mary) i b >> m mary) + (f x) (go (i + 1)) + in runSTA len <$> go 0 +{-# INLINE [1] traverse #-} + +-- TODO: Would it be better to just use a lazy traversal +-- and then force the elements of the result? My guess is +-- yes. +traverse' :: Applicative f => (a -> f b) -> Array a -> f (Array b) +traverse' f = \ !ary -> + let + !len = length ary + go !i + | i == len = pure $ STA $ \mary -> unsafeFreeze (MArray mary) + | (# x #) <- index# ary i + = liftA2 (\ !b (STA m) -> STA $ \mary -> + write (MArray mary) i b >> m mary) + (f x) (go (i + 1)) + in runSTA len <$> go 0 +{-# INLINE [1] traverse' #-} + +-- Traversing in ST, we don't need to get fancy; we +-- can just do it directly. +traverseST :: (a -> ST s b) -> Array a -> ST s (Array b) +traverseST f = \ ary0 -> + let + !len = length ary0 + go k !mary + | k == len = return mary + | otherwise = do + x <- indexM ary0 k + y <- f x + write mary k y + go (k + 1) mary + in new_ len >>= (go 0 >=> unsafeFreeze) +{-# INLINE traverseST #-} + +traverseIO :: (a -> IO b) -> Array a -> IO (Array b) +traverseIO f = \ ary0 -> + let + !len = length ary0 + go k !mary + | k == len = return mary + | otherwise = do + x <- stToIO $ indexM ary0 k + y <- f x + stToIO $ write mary k y + go (k + 1) mary + in stToIO (new_ len) >>= (go 0 >=> stToIO . unsafeFreeze) +{-# INLINE traverseIO #-} + + +-- Why don't we have similar RULES for traverse'? The efficient +-- way to traverse strictly in IO or ST is to force results as +-- they come in, which leads to different semantics. In particular, +-- we need to ensure that +-- +-- traverse' (\x -> print x *> pure undefined) xs +-- +-- will actually print all the values and then return undefined. +-- We could add a strict mapMWithIndex, operating in an arbitrary +-- Monad, that supported such rules, but we don't have that right now. +{-# RULES +"traverse/ST" forall f. traverse f = traverseST f +"traverse/IO" forall f. traverse f = traverseIO f + #-} diff --git a/Data/HashMap/Base.hs b/Data/HashMap/Base.hs new file mode 100644 index 0000000..553d2b3 --- /dev/null +++ b/Data/HashMap/Base.hs @@ -0,0 +1,1895 @@ +{-# LANGUAGE BangPatterns, CPP, DeriveDataTypeable, MagicHash #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RoleAnnotations #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE LambdaCase #-} +#if __GLASGOW_HASKELL__ >= 802 +{-# LANGUAGE TypeInType #-} +{-# LANGUAGE UnboxedSums #-} +#endif +{-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} + +module Data.HashMap.Base + ( + HashMap(..) + , Leaf(..) + + -- * Construction + , empty + , singleton + + -- * Basic interface + , null + , size + , member + , lookup + , lookupDefault + , (!) + , insert + , insertWith + , unsafeInsert + , delete + , adjust + , update + , alter + , alterF + + -- * Combine + -- ** Union + , union + , unionWith + , unionWithKey + , unions + + -- * Transformations + , map + , mapWithKey + , traverseWithKey + + -- * Difference and intersection + , difference + , differenceWith + , intersection + , intersectionWith + , intersectionWithKey + + -- * Folds + , foldl' + , foldlWithKey' + , foldr + , foldrWithKey + + -- * Filter + , mapMaybe + , mapMaybeWithKey + , filter + , filterWithKey + + -- * Conversions + , keys + , elems + + -- ** Lists + , toList + , fromList + , fromListWith + + -- Internals used by the strict version + , Hash + , Bitmap + , bitmapIndexedOrFull + , collision + , hash + , mask + , index + , bitsPerSubkey + , fullNodeMask + , sparseIndex + , two + , unionArrayBy + , update16 + , update16M + , update16With' + , updateOrConcatWith + , updateOrConcatWithKey + , filterMapAux + , equalKeys + , equalKeys1 + , lookupRecordCollision + , LookupRes(..) + , insert' + , delete' + , lookup' + , insertNewKey + , insertKeyExists + , deleteKeyExists + , insertModifying + , ptrEq + , adjust# + ) where + +#if __GLASGOW_HASKELL__ < 710 +import Control.Applicative ((<$>), Applicative(pure)) +import Data.Monoid (Monoid(mempty, mappend)) +import Data.Traversable (Traversable(..)) +import Data.Word (Word) +#endif +#if __GLASGOW_HASKELL__ >= 711 +import Data.Semigroup (Semigroup((<>))) +#endif +import Control.DeepSeq (NFData(rnf)) +import Control.Monad.ST (ST) +import Data.Bits ((.&.), (.|.), complement, popCount) +import Data.Data hiding (Typeable) +import qualified Data.Foldable as Foldable +import qualified Data.List as L +import GHC.Exts ((==#), build, reallyUnsafePtrEquality#) +import Prelude hiding (filter, foldr, lookup, map, null, pred) +import Text.Read hiding (step) + +import qualified Data.HashMap.Array as A +import qualified Data.Hashable as H +import Data.Hashable (Hashable) +import Data.HashMap.Unsafe (runST) +import Data.HashMap.UnsafeShift (unsafeShiftL, unsafeShiftR) +import Data.HashMap.List (isPermutationBy, unorderedCompare) +import Data.Typeable (Typeable) + +import GHC.Exts (isTrue#) +import qualified GHC.Exts as Exts + +#if MIN_VERSION_base(4,9,0) +import Data.Functor.Classes +#endif + +#if MIN_VERSION_hashable(1,2,5) +import qualified Data.Hashable.Lifted as H +#endif + +#if __GLASGOW_HASKELL__ >= 802 +import GHC.Exts (TYPE, Int (..), Int#) +#endif + +#if MIN_VERSION_base(4,8,0) +import Data.Functor.Identity (Identity (..)) +#endif +import Control.Applicative (Const (..)) +import Data.Coerce (coerce) + +-- | A set of values. A set cannot contain duplicate values. +------------------------------------------------------------------------ + +-- | Convenience function. Compute a hash value for the given value. +hash :: H.Hashable a => a -> Hash +hash = fromIntegral . H.hash + +data Leaf k v = L !k v + deriving (Eq) + +instance (NFData k, NFData v) => NFData (Leaf k v) where + rnf (L k v) = rnf k `seq` rnf v + +-- Invariant: The length of the 1st argument to 'Full' is +-- 2^bitsPerSubkey + +-- | A map from keys to values. A map cannot contain duplicate keys; +-- each key can map to at most one value. +data HashMap k v + = Empty + | BitmapIndexed !Bitmap !(A.Array (HashMap k v)) + | Leaf !Hash !(Leaf k v) + | Full !(A.Array (HashMap k v)) + | Collision !Hash !(A.Array (Leaf k v)) + deriving (Typeable) + +type role HashMap nominal representational + +instance (NFData k, NFData v) => NFData (HashMap k v) where + rnf Empty = () + rnf (BitmapIndexed _ ary) = rnf ary + rnf (Leaf _ l) = rnf l + rnf (Full ary) = rnf ary + rnf (Collision _ ary) = rnf ary + +instance Functor (HashMap k) where + fmap = map + +instance Foldable.Foldable (HashMap k) where + foldr f = foldrWithKey (const f) + +#if __GLASGOW_HASKELL__ >= 711 +instance (Eq k, Hashable k) => Semigroup (HashMap k v) where + (<>) = union + {-# INLINE (<>) #-} +#endif + +instance (Eq k, Hashable k) => Monoid (HashMap k v) where + mempty = empty + {-# INLINE mempty #-} +#if __GLASGOW_HASKELL__ >= 711 + mappend = (<>) +#else + mappend = union +#endif + {-# INLINE mappend #-} + +instance (Data k, Data v, Eq k, Hashable k) => Data (HashMap k v) where + gfoldl f z m = z fromList `f` toList m + toConstr _ = fromListConstr + gunfold k z c = case constrIndex c of + 1 -> k (z fromList) + _ -> error "gunfold" + dataTypeOf _ = hashMapDataType + dataCast2 f = gcast2 f + +fromListConstr :: Constr +fromListConstr = mkConstr hashMapDataType "fromList" [] Prefix + +hashMapDataType :: DataType +hashMapDataType = mkDataType "Data.HashMap.Base.HashMap" [fromListConstr] + +type Hash = Word +type Bitmap = Word +type Shift = Int + +#if MIN_VERSION_base(4,9,0) +instance Show2 HashMap where + liftShowsPrec2 spk slk spv slv d m = + showsUnaryWith (liftShowsPrec sp sl) "fromList" d (toList m) + where + sp = liftShowsPrec2 spk slk spv slv + sl = liftShowList2 spk slk spv slv + +instance Show k => Show1 (HashMap k) where + liftShowsPrec = liftShowsPrec2 showsPrec showList + +instance (Eq k, Hashable k, Read k) => Read1 (HashMap k) where + liftReadsPrec rp rl = readsData $ + readsUnaryWith (liftReadsPrec rp' rl') "fromList" fromList + where + rp' = liftReadsPrec rp rl + rl' = liftReadList rp rl +#endif + +instance (Eq k, Hashable k, Read k, Read e) => Read (HashMap k e) where + readPrec = parens $ prec 10 $ do + Ident "fromList" <- lexP + xs <- readPrec + return (fromList xs) + + readListPrec = readListPrecDefault + +instance (Show k, Show v) => Show (HashMap k v) where + showsPrec d m = showParen (d > 10) $ + showString "fromList " . shows (toList m) + +instance Traversable (HashMap k) where + traverse f = traverseWithKey (const f) + {-# INLINABLE traverse #-} + +#if MIN_VERSION_base(4,9,0) +instance Eq2 HashMap where + liftEq2 = equal2 + +instance Eq k => Eq1 (HashMap k) where + liftEq = equal1 +#endif + +instance (Eq k, Eq v) => Eq (HashMap k v) where + (==) = equal1 (==) + +-- We rely on there being no Empty constructors in the tree! +-- This ensures that two equal HashMaps will have the same +-- shape, modulo the order of entries in Collisions. +equal1 :: Eq k + => (v -> v' -> Bool) + -> HashMap k v -> HashMap k v' -> Bool +equal1 eq = go + where + go Empty Empty = True + go (BitmapIndexed bm1 ary1) (BitmapIndexed bm2 ary2) + = bm1 == bm2 && A.sameArray1 go ary1 ary2 + go (Leaf h1 l1) (Leaf h2 l2) = h1 == h2 && leafEq l1 l2 + go (Full ary1) (Full ary2) = A.sameArray1 go ary1 ary2 + go (Collision h1 ary1) (Collision h2 ary2) + = h1 == h2 && isPermutationBy leafEq (A.toList ary1) (A.toList ary2) + go _ _ = False + + leafEq (L k1 v1) (L k2 v2) = k1 == k2 && eq v1 v2 + +equal2 :: (k -> k' -> Bool) -> (v -> v' -> Bool) + -> HashMap k v -> HashMap k' v' -> Bool +equal2 eqk eqv t1 t2 = go (toList' t1 []) (toList' t2 []) + where + -- If the two trees are the same, then their lists of 'Leaf's and + -- 'Collision's read from left to right should be the same (modulo the + -- order of elements in 'Collision'). + + go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) + | k1 == k2 && + leafEq l1 l2 + = go tl1 tl2 + go (Collision k1 ary1 : tl1) (Collision k2 ary2 : tl2) + | k1 == k2 && + A.length ary1 == A.length ary2 && + isPermutationBy leafEq (A.toList ary1) (A.toList ary2) + = go tl1 tl2 + go [] [] = True + go _ _ = False + + leafEq (L k v) (L k' v') = eqk k k' && eqv v v' + +#if MIN_VERSION_base(4,9,0) +instance Ord2 HashMap where + liftCompare2 = cmp + +instance Ord k => Ord1 (HashMap k) where + liftCompare = cmp compare +#endif + +-- | The order is total. +-- +-- /Note:/ Because the hash is not guaranteed to be stable across library +-- versions, OSes, or architectures, neither is an actual order of elements in +-- 'HashMap' or an result of `compare`.is stable. +instance (Ord k, Ord v) => Ord (HashMap k v) where + compare = cmp compare compare + +cmp :: (k -> k' -> Ordering) -> (v -> v' -> Ordering) + -> HashMap k v -> HashMap k' v' -> Ordering +cmp cmpk cmpv t1 t2 = go (toList' t1 []) (toList' t2 []) + where + go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) + = compare k1 k2 `mappend` + leafCompare l1 l2 `mappend` + go tl1 tl2 + go (Collision k1 ary1 : tl1) (Collision k2 ary2 : tl2) + = compare k1 k2 `mappend` + compare (A.length ary1) (A.length ary2) `mappend` + unorderedCompare leafCompare (A.toList ary1) (A.toList ary2) `mappend` + go tl1 tl2 + go (Leaf _ _ : _) (Collision _ _ : _) = LT + go (Collision _ _ : _) (Leaf _ _ : _) = GT + go [] [] = EQ + go [] _ = LT + go _ [] = GT + go _ _ = error "cmp: Should never happend, toList' includes non Leaf / Collision" + + leafCompare (L k v) (L k' v') = cmpk k k' `mappend` cmpv v v' + +-- Same as 'equal' but doesn't compare the values. +equalKeys1 :: (k -> k' -> Bool) -> HashMap k v -> HashMap k' v' -> Bool +equalKeys1 eq t1 t2 = go (toList' t1 []) (toList' t2 []) + where + go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) + | k1 == k2 && leafEq l1 l2 + = go tl1 tl2 + go (Collision k1 ary1 : tl1) (Collision k2 ary2 : tl2) + | k1 == k2 && A.length ary1 == A.length ary2 && + isPermutationBy leafEq (A.toList ary1) (A.toList ary2) + = go tl1 tl2 + go [] [] = True + go _ _ = False + + leafEq (L k _) (L k' _) = eq k k' + +-- Same as 'equal1' but doesn't compare the values. +equalKeys :: Eq k => HashMap k v -> HashMap k v' -> Bool +equalKeys = go + where + go :: Eq k => HashMap k v -> HashMap k v' -> Bool + go Empty Empty = True + go (BitmapIndexed bm1 ary1) (BitmapIndexed bm2 ary2) + = bm1 == bm2 && A.sameArray1 go ary1 ary2 + go (Leaf h1 l1) (Leaf h2 l2) = h1 == h2 && leafEq l1 l2 + go (Full ary1) (Full ary2) = A.sameArray1 go ary1 ary2 + go (Collision h1 ary1) (Collision h2 ary2) + = h1 == h2 && isPermutationBy leafEq (A.toList ary1) (A.toList ary2) + go _ _ = False + + leafEq (L k1 _) (L k2 _) = k1 == k2 + +#if MIN_VERSION_hashable(1,2,5) +instance H.Hashable2 HashMap where + liftHashWithSalt2 hk hv salt hm = go salt (toList' hm []) + where + -- go :: Int -> [HashMap k v] -> Int + go s [] = s + go s (Leaf _ l : tl) + = s `hashLeafWithSalt` l `go` tl + -- For collisions we hashmix hash value + -- and then array of values' hashes sorted + go s (Collision h a : tl) + = (s `H.hashWithSalt` h) `hashCollisionWithSalt` a `go` tl + go s (_ : tl) = s `go` tl + + -- hashLeafWithSalt :: Int -> Leaf k v -> Int + hashLeafWithSalt s (L k v) = (s `hk` k) `hv` v + + -- hashCollisionWithSalt :: Int -> A.Array (Leaf k v) -> Int + hashCollisionWithSalt s + = L.foldl' H.hashWithSalt s . arrayHashesSorted s + + -- arrayHashesSorted :: Int -> A.Array (Leaf k v) -> [Int] + arrayHashesSorted s = L.sort . L.map (hashLeafWithSalt s) . A.toList + +instance (Hashable k) => H.Hashable1 (HashMap k) where + liftHashWithSalt = H.liftHashWithSalt2 H.hashWithSalt +#endif + +instance (Hashable k, Hashable v) => Hashable (HashMap k v) where + hashWithSalt salt hm = go salt (toList' hm []) + where + go :: Int -> [HashMap k v] -> Int + go s [] = s + go s (Leaf _ l : tl) + = s `hashLeafWithSalt` l `go` tl + -- For collisions we hashmix hash value + -- and then array of values' hashes sorted + go s (Collision h a : tl) + = (s `H.hashWithSalt` h) `hashCollisionWithSalt` a `go` tl + go s (_ : tl) = s `go` tl + + hashLeafWithSalt :: Int -> Leaf k v -> Int + hashLeafWithSalt s (L k v) = s `H.hashWithSalt` k `H.hashWithSalt` v + + hashCollisionWithSalt :: Int -> A.Array (Leaf k v) -> Int + hashCollisionWithSalt s + = L.foldl' H.hashWithSalt s . arrayHashesSorted s + + arrayHashesSorted :: Int -> A.Array (Leaf k v) -> [Int] + arrayHashesSorted s = L.sort . L.map (hashLeafWithSalt s) . A.toList + + -- Helper to get 'Leaf's and 'Collision's as a list. +toList' :: HashMap k v -> [HashMap k v] -> [HashMap k v] +toList' (BitmapIndexed _ ary) a = A.foldr toList' a ary +toList' (Full ary) a = A.foldr toList' a ary +toList' l@(Leaf _ _) a = l : a +toList' c@(Collision _ _) a = c : a +toList' Empty a = a + +-- Helper function to detect 'Leaf's and 'Collision's. +isLeafOrCollision :: HashMap k v -> Bool +isLeafOrCollision (Leaf _ _) = True +isLeafOrCollision (Collision _ _) = True +isLeafOrCollision _ = False + +------------------------------------------------------------------------ +-- * Construction + +-- | /O(1)/ Construct an empty map. +empty :: HashMap k v +empty = Empty + +-- | /O(1)/ Construct a map with a single element. +singleton :: (Hashable k) => k -> v -> HashMap k v +singleton k v = Leaf (hash k) (L k v) + +------------------------------------------------------------------------ +-- * Basic interface + +-- | /O(1)/ Return 'True' if this map is empty, 'False' otherwise. +null :: HashMap k v -> Bool +null Empty = True +null _ = False + +-- | /O(n)/ Return the number of key-value mappings in this map. +size :: HashMap k v -> Int +size t = go t 0 + where + go Empty !n = n + go (Leaf _ _) n = n + 1 + go (BitmapIndexed _ ary) n = A.foldl' (flip go) n ary + go (Full ary) n = A.foldl' (flip go) n ary + go (Collision _ ary) n = n + A.length ary + +-- | /O(log n)/ Return 'True' if the specified key is present in the +-- map, 'False' otherwise. +member :: (Eq k, Hashable k) => k -> HashMap k a -> Bool +member k m = case lookup k m of + Nothing -> False + Just _ -> True +{-# INLINABLE member #-} + +-- | /O(log n)/ Return the value to which the specified key is mapped, +-- or 'Nothing' if this map contains no mapping for the key. +lookup :: (Eq k, Hashable k) => k -> HashMap k v -> Maybe v +#if __GLASGOW_HASKELL__ >= 802 +-- GHC does not yet perform a worker-wrapper transformation on +-- unboxed sums automatically. That seems likely to happen at some +-- point (possibly as early as GHC 8.6) but for now we do it manually. +lookup k m = case lookup# k m of + (# (# #) | #) -> Nothing + (# | a #) -> Just a +{-# INLINE lookup #-} + +lookup# :: (Eq k, Hashable k) => k -> HashMap k v -> (# (# #) | v #) +lookup# k m = lookupCont (\_ -> (# (# #) | #)) (\v _i -> (# | v #)) (hash k) k m +{-# INLINABLE lookup# #-} + +#else + +lookup k m = lookupCont (\_ -> Nothing) (\v _i -> Just v) (hash k) k m +{-# INLINABLE lookup #-} +#endif + +-- | lookup' is a version of lookup that takes the hash separately. +-- It is used to implement alterF. +lookup' :: Eq k => Hash -> k -> HashMap k v -> Maybe v +#if __GLASGOW_HASKELL__ >= 802 +-- GHC does not yet perform a worker-wrapper transformation on +-- unboxed sums automatically. That seems likely to happen at some +-- point (possibly as early as GHC 8.6) but for now we do it manually. +-- lookup' would probably prefer to be implemented in terms of its own +-- lookup'#, but it's not important enough and we don't want too much +-- code. +lookup' h k m = case lookupRecordCollision# h k m of + (# (# #) | #) -> Nothing + (# | (# a, _i #) #) -> Just a +{-# INLINE lookup' #-} +#else +lookup' h k m = lookupCont (\_ -> Nothing) (\v _i -> Just v) h k m +{-# INLINABLE lookup' #-} +#endif + +-- The result of a lookup, keeping track of if a hash collision occured. +-- If a collision did not occur then it will have the Int value (-1). +data LookupRes a = Absent | Present a !Int + +-- Internal helper for lookup. This version takes the precomputed hash so +-- that functions that make multiple calls to lookup and related functions +-- (insert, delete) only need to calculate the hash once. +-- +-- It is used by 'alterF' so that hash computation and key comparison only needs +-- to be performed once. With this information you can use the more optimized +-- versions of insert ('insertNewKey', 'insertKeyExists') and delete +-- ('deleteKeyExists') +-- +-- Outcomes: +-- Key not in map => Absent +-- Key in map, no collision => Present v (-1) +-- Key in map, collision => Present v position +lookupRecordCollision :: Eq k => Hash -> k -> HashMap k v -> LookupRes v +#if __GLASGOW_HASKELL__ >= 802 +lookupRecordCollision h k m = case lookupRecordCollision# h k m of + (# (# #) | #) -> Absent + (# | (# a, i #) #) -> Present a (I# i) -- GHC will eliminate the I# +{-# INLINE lookupRecordCollision #-} + +-- Why do we produce an Int# instead of an Int? Unfortunately, GHC is not +-- yet any good at unboxing things *inside* products, let alone sums. That +-- may be changing in GHC 8.6 or so (there is some work in progress), but +-- for now we use Int# explicitly here. We don't need to push the Int# +-- into lookupCont because inlining takes care of that. +lookupRecordCollision# :: Eq k => Hash -> k -> HashMap k v -> (# (# #) | (# v, Int# #) #) +lookupRecordCollision# h k m = + lookupCont (\_ -> (# (# #) | #)) (\v (I# i) -> (# | (# v, i #) #)) h k m +-- INLINABLE to specialize to the Eq instance. +{-# INLINABLE lookupRecordCollision# #-} + +#else /* GHC < 8.2 so there are no unboxed sums */ + +lookupRecordCollision h k m = lookupCont (\_ -> Absent) Present h k m +{-# INLINABLE lookupRecordCollision #-} +#endif + +-- A two-continuation version of lookupRecordCollision. This lets us +-- share source code between lookup and lookupRecordCollision without +-- risking any performance degradation. +-- +-- The absent continuation has type @((# #) -> r)@ instead of just @r@ +-- so we can be representation-polymorphic in the result type. Since +-- this whole thing is always inlined, we don't have to worry about +-- any extra CPS overhead. +lookupCont :: +#if __GLASGOW_HASKELL__ >= 802 + forall rep (r :: TYPE rep) k v. +#else + forall r k v. +#endif + Eq k + => ((# #) -> r) -- Absent continuation + -> (v -> Int -> r) -- Present continuation + -> Hash -- The hash of the key + -> k -> HashMap k v -> r +lookupCont absent present !h0 !k0 !m0 = go h0 k0 0 m0 + where + go :: Eq k => Hash -> k -> Int -> HashMap k v -> r + go !_ !_ !_ Empty = absent (# #) + go h k _ (Leaf hx (L kx x)) + | h == hx && k == kx = present x (-1) + | otherwise = absent (# #) + go h k s (BitmapIndexed b v) + | b .&. m == 0 = absent (# #) + | otherwise = + go h k (s+bitsPerSubkey) (A.index v (sparseIndex b m)) + where m = mask h s + go h k s (Full v) = + go h k (s+bitsPerSubkey) (A.index v (index h s)) + go h k _ (Collision hx v) + | h == hx = lookupInArrayCont absent present k v + | otherwise = absent (# #) +{-# INLINE lookupCont #-} + +-- | /O(log n)/ Return the value to which the specified key is mapped, +-- or the default value if this map contains no mapping for the key. +lookupDefault :: (Eq k, Hashable k) + => v -- ^ Default value to return. + -> k -> HashMap k v -> v +lookupDefault def k t = case lookup k t of + Just v -> v + _ -> def +{-# INLINABLE lookupDefault #-} + +-- | /O(log n)/ Return the value to which the specified key is mapped. +-- Calls 'error' if this map contains no mapping for the key. +(!) :: (Eq k, Hashable k) => HashMap k v -> k -> v +(!) m k = case lookup k m of + Just v -> v + Nothing -> error "Data.HashMap.Base.(!): key not found" +{-# INLINABLE (!) #-} + +infixl 9 ! + +-- | Create a 'Collision' value with two 'Leaf' values. +collision :: Hash -> Leaf k v -> Leaf k v -> HashMap k v +collision h !e1 !e2 = + let v = A.run $ do mary <- A.new 2 e1 + A.write mary 1 e2 + return mary + in Collision h v +{-# INLINE collision #-} + +-- | Create a 'BitmapIndexed' or 'Full' node. +bitmapIndexedOrFull :: Bitmap -> A.Array (HashMap k v) -> HashMap k v +bitmapIndexedOrFull b ary + | b == fullNodeMask = Full ary + | otherwise = BitmapIndexed b ary +{-# INLINE bitmapIndexedOrFull #-} + +-- | /O(log n)/ Associate the specified value with the specified +-- key in this map. If this map previously contained a mapping for +-- the key, the old value is replaced. +insert :: (Eq k, Hashable k) => k -> v -> HashMap k v -> HashMap k v +insert k v m = insert' (hash k) k v m +{-# INLINABLE insert #-} + +insert' :: Eq k => Hash -> k -> v -> HashMap k v -> HashMap k v +insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0 + where + go !h !k x !_ Empty = Leaf h (L k x) + go h k x s t@(Leaf hy l@(L ky y)) + | hy == h = if ky == k + then if x `ptrEq` y + then t + else Leaf h (L k x) + else collision h l (L k x) + | otherwise = runST (two s h k x hy ky y) + go h k x s t@(BitmapIndexed b ary) + | b .&. m == 0 = + let !ary' = A.insert ary i $! Leaf h (L k x) + in bitmapIndexedOrFull (b .|. m) ary' + | otherwise = + let !st = A.index ary i + !st' = go h k x (s+bitsPerSubkey) st + in if st' `ptrEq` st + then t + else BitmapIndexed b (A.update ary i st') + where m = mask h s + i = sparseIndex b m + go h k x s t@(Full ary) = + let !st = A.index ary i + !st' = go h k x (s+bitsPerSubkey) st + in if st' `ptrEq` st + then t + else Full (update16 ary i st') + where i = index h s + go h k x s t@(Collision hy v) + | h == hy = Collision h (updateOrSnocWith const k x v) + | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) +{-# INLINABLE insert' #-} + +-- Insert optimized for the case when we know the key is not in the map. +-- +-- It is only valid to call this when the key does not exist in the map. +-- +-- We can skip: +-- - the key equality check on a Leaf +-- - check for its existence in the array for a hash collision +insertNewKey :: Hash -> k -> v -> HashMap k v -> HashMap k v +insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0 + where + go !h !k x !_ Empty = Leaf h (L k x) + go h k x s (Leaf hy l@(L ky y)) + | hy == h = collision h l (L k x) + | otherwise = runST (two s h k x hy ky y) + go h k x s (BitmapIndexed b ary) + | b .&. m == 0 = + let !ary' = A.insert ary i $! Leaf h (L k x) + in bitmapIndexedOrFull (b .|. m) ary' + | otherwise = + let !st = A.index ary i + !st' = go h k x (s+bitsPerSubkey) st + in BitmapIndexed b (A.update ary i st') + where m = mask h s + i = sparseIndex b m + go h k x s (Full ary) = + let !st = A.index ary i + !st' = go h k x (s+bitsPerSubkey) st + in Full (update16 ary i st') + where i = index h s + go h k x s t@(Collision hy v) + | h == hy = Collision h (snocNewLeaf (L k x) v) + | otherwise = + go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + where + snocNewLeaf :: Leaf k v -> A.Array (Leaf k v) -> A.Array (Leaf k v) + snocNewLeaf leaf ary = A.run $ do + let n = A.length ary + mary <- A.new_ (n + 1) + A.copy ary 0 mary 0 n + A.write mary n leaf + return mary +{-# NOINLINE insertNewKey #-} + + +-- Insert optimized for the case when we know the key is in the map. +-- +-- It is only valid to call this when the key exists in the map and you know the +-- hash collision position if there was one. This information can be obtained +-- from 'lookupRecordCollision'. If there is no collision pass (-1) as collPos +-- (first argument). +-- +-- We can skip the key equality check on a Leaf because we know the leaf must be +-- for this key. +insertKeyExists :: Int -> Hash -> k -> v -> HashMap k v -> HashMap k v +insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 0 m0 + where + go !_collPos !h !k x !_s (Leaf _hy _kx) + = Leaf h (L k x) + go collPos h k x s (BitmapIndexed b ary) + | b .&. m == 0 = + let !ary' = A.insert ary i $ Leaf h (L k x) + in bitmapIndexedOrFull (b .|. m) ary' + | otherwise = + let !st = A.index ary i + !st' = go collPos h k x (s+bitsPerSubkey) st + in BitmapIndexed b (A.update ary i st') + where m = mask h s + i = sparseIndex b m + go collPos h k x s (Full ary) = + let !st = A.index ary i + !st' = go collPos h k x (s+bitsPerSubkey) st + in Full (update16 ary i st') + where i = index h s + go collPos h k x _s (Collision _hy v) + | collPos >= 0 = Collision h (setAtPosition collPos k x v) + | otherwise = Empty -- error "Internal error: go {collPos negative}" + go _ _ _ _ _ Empty = Empty -- error "Internal error: go Empty" + +{-# NOINLINE insertKeyExists #-} + +-- Replace the ith Leaf with Leaf k v. +-- +-- This does not check that @i@ is within bounds of the array. +setAtPosition :: Int -> k -> v -> A.Array (Leaf k v) -> A.Array (Leaf k v) +setAtPosition i k x ary = A.update ary i (L k x) +{-# INLINE setAtPosition #-} + + +-- | In-place update version of insert +unsafeInsert :: (Eq k, Hashable k) => k -> v -> HashMap k v -> HashMap k v +unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) + where + h0 = hash k0 + go !h !k x !_ Empty = return $! Leaf h (L k x) + go h k x s t@(Leaf hy l@(L ky y)) + | hy == h = if ky == k + then if x `ptrEq` y + then return t + else return $! Leaf h (L k x) + else return $! collision h l (L k x) + | otherwise = two s h k x hy ky y + go h k x s t@(BitmapIndexed b ary) + | b .&. m == 0 = do + ary' <- A.insertM ary i $! Leaf h (L k x) + return $! bitmapIndexedOrFull (b .|. m) ary' + | otherwise = do + st <- A.indexM ary i + st' <- go h k x (s+bitsPerSubkey) st + A.unsafeUpdateM ary i st' + return t + where m = mask h s + i = sparseIndex b m + go h k x s t@(Full ary) = do + st <- A.indexM ary i + st' <- go h k x (s+bitsPerSubkey) st + A.unsafeUpdateM ary i st' + return t + where i = index h s + go h k x s t@(Collision hy v) + | h == hy = return $! Collision h (updateOrSnocWith const k x v) + | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) +{-# INLINABLE unsafeInsert #-} + +-- | Create a map from two key-value pairs which hashes don't collide. +two :: Shift -> Hash -> k -> v -> Hash -> k -> v -> ST s (HashMap k v) +two = go + where + go s h1 k1 v1 h2 k2 v2 + | bp1 == bp2 = do + st <- go (s+bitsPerSubkey) h1 k1 v1 h2 k2 v2 + ary <- A.singletonM st + return $! BitmapIndexed bp1 ary + | otherwise = do + mary <- A.new 2 $ Leaf h1 (L k1 v1) + A.write mary idx2 $ Leaf h2 (L k2 v2) + ary <- A.unsafeFreeze mary + return $! BitmapIndexed (bp1 .|. bp2) ary + where + bp1 = mask h1 s + bp2 = mask h2 s + idx2 | index h1 s < index h2 s = 1 + | otherwise = 0 +{-# INLINE two #-} + +-- | /O(log n)/ Associate the value with the key in this map. If +-- this map previously contained a mapping for the key, the old value +-- is replaced by the result of applying the given function to the new +-- and old value. Example: +-- +-- > insertWith f k v map +-- > where f new old = new + old +insertWith :: (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v + -> HashMap k v +-- We're not going to worry about allocating a function closure +-- to pass to insertModifying. See comments at 'adjust'. +insertWith f k new m = insertModifying new (\old -> (# f new old #)) k m +{-# INLINE insertWith #-} + +-- | @insertModifying@ is a lot like insertWith; we use it to implement alterF. +-- It takes a value to insert when the key is absent and a function +-- to apply to calculate a new value when the key is present. Thanks +-- to the unboxed unary tuple, we avoid introducing any unnecessary +-- thunks in the tree. +insertModifying :: (Eq k, Hashable k) => v -> (v -> (# v #)) -> k -> HashMap k v + -> HashMap k v +insertModifying x f k0 m0 = go h0 k0 0 m0 + where + !h0 = hash k0 + go !h !k !_ Empty = Leaf h (L k x) + go h k s t@(Leaf hy l@(L ky y)) + | hy == h = if ky == k + then case f y of + (# v' #) | ptrEq y v' -> t + | otherwise -> Leaf h (L k (v')) + else collision h l (L k x) + | otherwise = runST (two s h k x hy ky y) + go h k s t@(BitmapIndexed b ary) + | b .&. m == 0 = + let ary' = A.insert ary i $! Leaf h (L k x) + in bitmapIndexedOrFull (b .|. m) ary' + | otherwise = + let !st = A.index ary i + !st' = go h k (s+bitsPerSubkey) st + ary' = A.update ary i $! st' + in if ptrEq st st' + then t + else BitmapIndexed b ary' + where m = mask h s + i = sparseIndex b m + go h k s t@(Full ary) = + let !st = A.index ary i + !st' = go h k (s+bitsPerSubkey) st + ary' = update16 ary i $! st' + in if ptrEq st st' + then t + else Full ary' + where i = index h s + go h k s t@(Collision hy v) + | h == hy = + let !v' = insertModifyingArr x f k v + in if A.unsafeSameArray v v' + then t + else Collision h v' + | otherwise = go h k s $ BitmapIndexed (mask hy s) (A.singleton t) +{-# INLINABLE insertModifying #-} + +-- Like insertModifying for arrays; used to implement insertModifying +insertModifyingArr :: Eq k => v -> (v -> (# v #)) -> k -> A.Array (Leaf k v) + -> A.Array (Leaf k v) +insertModifyingArr x f k0 ary0 = go k0 ary0 0 (A.length ary0) + where + go !k !ary !i !n + | i >= n = A.run $ do + -- Not found, append to the end. + mary <- A.new_ (n + 1) + A.copy ary 0 mary 0 n + A.write mary n (L k x) + return mary + | otherwise = case A.index ary i of + (L kx y) | k == kx -> case f y of + (# y' #) -> if ptrEq y y' + then ary + else A.update ary i (L k y') + | otherwise -> go k ary (i+1) n +{-# INLINE insertModifyingArr #-} + +-- | In-place update version of insertWith +unsafeInsertWith :: forall k v. (Eq k, Hashable k) + => (v -> v -> v) -> k -> v -> HashMap k v + -> HashMap k v +unsafeInsertWith f k0 v0 m0 = runST (go h0 k0 v0 0 m0) + where + h0 = hash k0 + go :: Hash -> k -> v -> Shift -> HashMap k v -> ST s (HashMap k v) + go !h !k x !_ Empty = return $! Leaf h (L k x) + go h k x s (Leaf hy l@(L ky y)) + | hy == h = if ky == k + then return $! Leaf h (L k (f x y)) + else return $! collision h l (L k x) + | otherwise = two s h k x hy ky y + go h k x s t@(BitmapIndexed b ary) + | b .&. m == 0 = do + ary' <- A.insertM ary i $! Leaf h (L k x) + return $! bitmapIndexedOrFull (b .|. m) ary' + | otherwise = do + st <- A.indexM ary i + st' <- go h k x (s+bitsPerSubkey) st + A.unsafeUpdateM ary i st' + return t + where m = mask h s + i = sparseIndex b m + go h k x s t@(Full ary) = do + st <- A.indexM ary i + st' <- go h k x (s+bitsPerSubkey) st + A.unsafeUpdateM ary i st' + return t + where i = index h s + go h k x s t@(Collision hy v) + | h == hy = return $! Collision h (updateOrSnocWith f k x v) + | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) +{-# INLINABLE unsafeInsertWith #-} + +-- | /O(log n)/ Remove the mapping for the specified key from this map +-- if present. +delete :: (Eq k, Hashable k) => k -> HashMap k v -> HashMap k v +delete k m = delete' (hash k) k m +{-# INLINABLE delete #-} + +delete' :: Eq k => Hash -> k -> HashMap k v -> HashMap k v +delete' h0 k0 m0 = go h0 k0 0 m0 + where + go !_ !_ !_ Empty = Empty + go h k _ t@(Leaf hy (L ky _)) + | hy == h && ky == k = Empty + | otherwise = t + go h k s t@(BitmapIndexed b ary) + | b .&. m == 0 = t + | otherwise = + let !st = A.index ary i + !st' = go h k (s+bitsPerSubkey) st + in if st' `ptrEq` st + then t + else case st' of + Empty | A.length ary == 1 -> Empty + | A.length ary == 2 -> + case (i, A.index ary 0, A.index ary 1) of + (0, _, l) | isLeafOrCollision l -> l + (1, l, _) | isLeafOrCollision l -> l + _ -> bIndexed + | otherwise -> bIndexed + where + bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) + l | isLeafOrCollision l && A.length ary == 1 -> l + _ -> BitmapIndexed b (A.update ary i st') + where m = mask h s + i = sparseIndex b m + go h k s t@(Full ary) = + let !st = A.index ary i + !st' = go h k (s+bitsPerSubkey) st + in if st' `ptrEq` st + then t + else case st' of + Empty -> + let ary' = A.delete ary i + bm = fullNodeMask .&. complement (1 `unsafeShiftL` i) + in BitmapIndexed bm ary' + _ -> Full (A.update ary i st') + where i = index h s + go h k _ t@(Collision hy v) + | h == hy = case indexOf k v of + Just i + | A.length v == 2 -> + if i == 0 + then Leaf h (A.index v 1) + else Leaf h (A.index v 0) + | otherwise -> Collision h (A.delete v i) + Nothing -> t + | otherwise = t +{-# INLINABLE delete' #-} + +-- | Delete optimized for the case when we know the key is in the map. +-- +-- It is only valid to call this when the key exists in the map and you know the +-- hash collision position if there was one. This information can be obtained +-- from 'lookupRecordCollision'. If there is no collision pass (-1) as collPos. +-- +-- We can skip: +-- - the key equality check on the leaf, if we reach a leaf it must be the key +deleteKeyExists :: Int -> Hash -> k -> HashMap k v -> HashMap k v +deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 0 m0 + where + go :: Int -> Hash -> k -> Int -> HashMap k v -> HashMap k v + go !_collPos !_h !_k !_s (Leaf _ _) = Empty + go collPos h k s (BitmapIndexed b ary) = + let !st = A.index ary i + !st' = go collPos h k (s+bitsPerSubkey) st + in case st' of + Empty | A.length ary == 1 -> Empty + | A.length ary == 2 -> + case (i, A.index ary 0, A.index ary 1) of + (0, _, l) | isLeafOrCollision l -> l + (1, l, _) | isLeafOrCollision l -> l + _ -> bIndexed + | otherwise -> bIndexed + where + bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) + l | isLeafOrCollision l && A.length ary == 1 -> l + _ -> BitmapIndexed b (A.update ary i st') + where m = mask h s + i = sparseIndex b m + go collPos h k s (Full ary) = + let !st = A.index ary i + !st' = go collPos h k (s+bitsPerSubkey) st + in case st' of + Empty -> + let ary' = A.delete ary i + bm = fullNodeMask .&. complement (1 `unsafeShiftL` i) + in BitmapIndexed bm ary' + _ -> Full (A.update ary i st') + where i = index h s + go collPos h _ _ (Collision _hy v) + | A.length v == 2 + = if collPos == 0 + then Leaf h (A.index v 1) + else Leaf h (A.index v 0) + | otherwise = Collision h (A.delete v collPos) + go !_ !_ !_ !_ Empty = Empty -- error "Internal error: deleteKeyExists empty" +{-# NOINLINE deleteKeyExists #-} + +-- | /O(log n)/ Adjust the value tied to a given key in this map only +-- if it is present. Otherwise, leave the map alone. +adjust :: (Eq k, Hashable k) => (v -> v) -> k -> HashMap k v -> HashMap k v +-- This operation really likes to leak memory, so using this +-- indirect implementation shouldn't hurt much. Furthermore, it allows +-- GHC to avoid a leak when the function is lazy. In particular, +-- +-- adjust (const x) k m +-- ==> adjust# (\v -> (# const x v #)) k m +-- ==> adjust# (\_ -> (# x #)) k m +adjust f k m = adjust# (\v -> (# f v #)) k m +{-# INLINE adjust #-} + +-- | Much like 'adjust', but not inherently leaky. +adjust# :: (Eq k, Hashable k) => (v -> (# v #)) -> k -> HashMap k v -> HashMap k v +adjust# f k0 m0 = go h0 k0 0 m0 + where + h0 = hash k0 + go !_ !_ !_ Empty = Empty + go h k _ t@(Leaf hy (L ky y)) + | hy == h && ky == k = case f y of + (# y' #) | ptrEq y y' -> t + | otherwise -> Leaf h (L k y') + | otherwise = t + go h k s t@(BitmapIndexed b ary) + | b .&. m == 0 = t + | otherwise = let !st = A.index ary i + !st' = go h k (s+bitsPerSubkey) st + ary' = A.update ary i $! st' + in if ptrEq st st' + then t + else BitmapIndexed b ary' + where m = mask h s + i = sparseIndex b m + go h k s t@(Full ary) = + let i = index h s + !st = A.index ary i + !st' = go h k (s+bitsPerSubkey) st + ary' = update16 ary i $! st' + in if ptrEq st st' + then t + else Full ary' + go h k _ t@(Collision hy v) + | h == hy = let !v' = updateWith# f k v + in if A.unsafeSameArray v v' + then t + else Collision h v' + | otherwise = t +{-# INLINABLE adjust# #-} + +-- | /O(log n)/ The expression (@'update' f k map@) updates the value @x@ at @k@, +-- (if it is in the map). If (f k x) is @'Nothing', the element is deleted. +-- If it is (@'Just' y), the key k is bound to the new value y. +update :: (Eq k, Hashable k) => (a -> Maybe a) -> k -> HashMap k a -> HashMap k a +update f = alter (>>= f) +{-# INLINABLE update #-} + + +-- | /O(log n)/ The expression (@'alter' f k map@) alters the value @x@ at @k@, or +-- absence thereof. @alter@ can be used to insert, delete, or update a value in a +-- map. In short : @'lookup' k ('alter' f k m) = f ('lookup' k m)@. +alter :: (Eq k, Hashable k) => (Maybe v -> Maybe v) -> k -> HashMap k v -> HashMap k v +-- TODO(m-renaud): Consider using specialized insert and delete for alter. +alter f k m = + case f (lookup k m) of + Nothing -> delete k m + Just v -> insert k v m +{-# INLINABLE alter #-} + +-- | /O(log n)/ The expression (@'alterF' f k map@) alters the value @x@ at +-- @k@, or absence thereof. @alterF@ can be used to insert, delete, or update +-- a value in a map. +-- +-- Note: 'alterF' is a flipped version of the 'at' combinator from +-- . +-- +-- @since 0.2.9 +alterF :: (Functor f, Eq k, Hashable k) + => (Maybe v -> f (Maybe v)) -> k -> HashMap k v -> f (HashMap k v) +-- We only calculate the hash once, but unless this is rewritten +-- by rules we may test for key equality multiple times. +-- We force the value of the map for consistency with the rewritten +-- version; otherwise someone could tell the difference using a lazy +-- @f@ and a functor that is similar to Const but not actually Const. +alterF f = \ !k !m -> + let + !h = hash k + mv = lookup' h k m + in (<$> f mv) $ \fres -> + case fres of + Nothing -> delete' h k m + Just v' -> insert' h k v' m + +-- We unconditionally rewrite alterF in RULES, but we expose an +-- unfolding just in case it's used in some way that prevents the +-- rule from firing. +{-# INLINABLE [0] alterF #-} + +#if MIN_VERSION_base(4,8,0) +-- This is just a bottom value. See the comment on the "alterFWeird" +-- rule. +test_bottom :: a +test_bottom = error "Data.HashMap.alterF internal error: hit test_bottom" + +-- We use this as an error result in RULES to ensure we don't get +-- any useless CallStack nonsense. +bogus# :: (# #) -> (# a #) +bogus# _ = error "Data.HashMap.alterF internal error: hit bogus#" + +{-# RULES +-- We probe the behavior of @f@ by applying it to Nothing and to +-- Just test_bottom. Based on the results, and how they relate to +-- each other, we choose the best implementation. + +"alterFWeird" forall f. alterF f = + alterFWeird (f Nothing) (f (Just test_bottom)) f + +-- This rule covers situations where alterF is used to simply insert or +-- delete in Identity (most likely via Control.Lens.At). We recognize here +-- (through the repeated @x@ on the LHS) that +-- +-- @f Nothing = f (Just bottom)@, +-- +-- which guarantees that @f@ doesn't care what its argument is, so +-- we don't have to either. +-- +-- Why only Identity? A variant of this rule is actually valid regardless of +-- the functor, but for some functors (e.g., []), it can lead to the +-- same keys being compared multiple times, which is bad if they're +-- ugly things like strings. This is unfortunate, since the rule is likely +-- a good idea for almost all realistic uses, but I don't like nasty +-- edge cases. +"alterFconstant" forall (f :: Maybe a -> Identity (Maybe a)) x. + alterFWeird x x f = \ !k !m -> + Identity (case runIdentity x of {Nothing -> delete k m; Just a -> insert k a m}) + +-- This rule handles the case where 'alterF' is used to do 'insertWith'-like +-- things. Whenever possible, GHC will get rid of the Maybe nonsense for us. +-- We delay this rule to stage 1 so alterFconstant has a chance to fire. +"alterFinsertWith" [1] forall (f :: Maybe a -> Identity (Maybe a)) x y. + alterFWeird (coerce (Just x)) (coerce (Just y)) f = + coerce (insertModifying x (\mold -> case runIdentity (f (Just mold)) of + Nothing -> bogus# (# #) + Just new -> (# new #))) + +-- Handle the case where someone uses 'alterF' instead of 'adjust'. This +-- rule is kind of picky; it will only work if the function doesn't +-- do anything between case matching on the Maybe and producing a result. +"alterFadjust" forall (f :: Maybe a -> Identity (Maybe a)) _y. + alterFWeird (coerce Nothing) (coerce (Just _y)) f = + coerce (adjust# (\x -> case runIdentity (f (Just x)) of + Just x' -> (# x' #) + Nothing -> bogus# (# #))) + +-- The simple specialization to Const; in this case we can look up +-- the key without caring what position it's in. This is only a tiny +-- optimization. +"alterFlookup" forall _ign1 _ign2 (f :: Maybe a -> Const r (Maybe a)). + alterFWeird _ign1 _ign2 f = \ !k !m -> Const (getConst (f (lookup k m))) + #-} + +-- This is a very unsafe version of alterF used for RULES. When calling +-- alterFWeird x y f, the following *must* hold: +-- +-- x = f Nothing +-- y = f (Just _|_) +-- +-- Failure to abide by these laws will make demons come out of your nose. +alterFWeird + :: (Functor f, Eq k, Hashable k) + => f (Maybe v) + -> f (Maybe v) + -> (Maybe v -> f (Maybe v)) -> k -> HashMap k v -> f (HashMap k v) +alterFWeird _ _ f = alterFEager f +{-# INLINE [0] alterFWeird #-} + +-- | This is the default version of alterF that we use in most non-trivial +-- cases. It's called "eager" because it looks up the given key in the map +-- eagerly, whether or not the given function requires that information. +alterFEager :: (Functor f, Eq k, Hashable k) + => (Maybe v -> f (Maybe v)) -> k -> HashMap k v -> f (HashMap k v) +alterFEager f !k m = (<$> f mv) $ \fres -> + case fres of + + ------------------------------ + -- Delete the key from the map. + Nothing -> case lookupRes of + + -- Key did not exist in the map to begin with, no-op + Absent -> m + + -- Key did exist + Present _ collPos -> deleteKeyExists collPos h k m + + ------------------------------ + -- Update value + Just v' -> case lookupRes of + + -- Key did not exist before, insert v' under a new key + Absent -> insertNewKey h k v' m + + -- Key existed before + Present v collPos -> + if v `ptrEq` v' + -- If the value is identical, no-op + then m + -- If the value changed, update the value. + else insertKeyExists collPos h k v' m + + where !h = hash k + !lookupRes = lookupRecordCollision h k m + !mv = case lookupRes of + Absent -> Nothing + Present v _ -> Just v +{-# INLINABLE alterFEager #-} +#endif + + +------------------------------------------------------------------------ +-- * Combine + +-- | /O(n+m)/ The union of two maps. If a key occurs in both maps, the +-- mapping from the first will be the mapping in the result. +union :: (Eq k, Hashable k) => HashMap k v -> HashMap k v -> HashMap k v +union = unionWith const +{-# INLINABLE union #-} + +-- | /O(n+m)/ The union of two maps. If a key occurs in both maps, +-- the provided function (first argument) will be used to compute the +-- result. +unionWith :: (Eq k, Hashable k) => (v -> v -> v) -> HashMap k v -> HashMap k v + -> HashMap k v +unionWith f = unionWithKey (const f) +{-# INLINE unionWith #-} + +-- | /O(n+m)/ The union of two maps. If a key occurs in both maps, +-- the provided function (first argument) will be used to compute the +-- result. +unionWithKey :: (Eq k, Hashable k) => (k -> v -> v -> v) -> HashMap k v -> HashMap k v + -> HashMap k v +unionWithKey f = go 0 + where + -- empty vs. anything + go !_ t1 Empty = t1 + go _ Empty t2 = t2 + -- leaf vs. leaf + go s t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) + | h1 == h2 = if k1 == k2 + then Leaf h1 (L k1 (f k1 v1 v2)) + else collision h1 l1 l2 + | otherwise = goDifferentHash s h1 h2 t1 t2 + go s t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) + | h1 == h2 = Collision h1 (updateOrSnocWithKey f k1 v1 ls2) + | otherwise = goDifferentHash s h1 h2 t1 t2 + go s t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) + | h1 == h2 = Collision h1 (updateOrSnocWithKey (flip . f) k2 v2 ls1) + | otherwise = goDifferentHash s h1 h2 t1 t2 + go s t1@(Collision h1 ls1) t2@(Collision h2 ls2) + | h1 == h2 = Collision h1 (updateOrConcatWithKey f ls1 ls2) + | otherwise = goDifferentHash s h1 h2 t1 t2 + -- branch vs. branch + go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = + let b' = b1 .|. b2 + ary' = unionArrayBy (go (s+bitsPerSubkey)) b1 b2 ary1 ary2 + in bitmapIndexedOrFull b' ary' + go s (BitmapIndexed b1 ary1) (Full ary2) = + let ary' = unionArrayBy (go (s+bitsPerSubkey)) b1 fullNodeMask ary1 ary2 + in Full ary' + go s (Full ary1) (BitmapIndexed b2 ary2) = + let ary' = unionArrayBy (go (s+bitsPerSubkey)) fullNodeMask b2 ary1 ary2 + in Full ary' + go s (Full ary1) (Full ary2) = + let ary' = unionArrayBy (go (s+bitsPerSubkey)) fullNodeMask fullNodeMask + ary1 ary2 + in Full ary' + -- leaf vs. branch + go s (BitmapIndexed b1 ary1) t2 + | b1 .&. m2 == 0 = let ary' = A.insert ary1 i t2 + b' = b1 .|. m2 + in bitmapIndexedOrFull b' ary' + | otherwise = let ary' = A.updateWith' ary1 i $ \st1 -> + go (s+bitsPerSubkey) st1 t2 + in BitmapIndexed b1 ary' + where + h2 = leafHashCode t2 + m2 = mask h2 s + i = sparseIndex b1 m2 + go s t1 (BitmapIndexed b2 ary2) + | b2 .&. m1 == 0 = let ary' = A.insert ary2 i $! t1 + b' = b2 .|. m1 + in bitmapIndexedOrFull b' ary' + | otherwise = let ary' = A.updateWith' ary2 i $ \st2 -> + go (s+bitsPerSubkey) t1 st2 + in BitmapIndexed b2 ary' + where + h1 = leafHashCode t1 + m1 = mask h1 s + i = sparseIndex b2 m1 + go s (Full ary1) t2 = + let h2 = leafHashCode t2 + i = index h2 s + ary' = update16With' ary1 i $ \st1 -> go (s+bitsPerSubkey) st1 t2 + in Full ary' + go s t1 (Full ary2) = + let h1 = leafHashCode t1 + i = index h1 s + ary' = update16With' ary2 i $ \st2 -> go (s+bitsPerSubkey) t1 st2 + in Full ary' + + leafHashCode (Leaf h _) = h + leafHashCode (Collision h _) = h + leafHashCode _ = error "leafHashCode" + + goDifferentHash s h1 h2 t1 t2 + | m1 == m2 = BitmapIndexed m1 (A.singleton $! go (s+bitsPerSubkey) t1 t2) + | m1 < m2 = BitmapIndexed (m1 .|. m2) (A.pair t1 t2) + | otherwise = BitmapIndexed (m1 .|. m2) (A.pair t2 t1) + where + m1 = mask h1 s + m2 = mask h2 s +{-# INLINE unionWithKey #-} + +-- | Strict in the result of @f@. +unionArrayBy :: (a -> a -> a) -> Bitmap -> Bitmap -> A.Array a -> A.Array a + -> A.Array a +unionArrayBy f b1 b2 ary1 ary2 = A.run $ do + let b' = b1 .|. b2 + mary <- A.new_ (popCount b') + -- iterate over nonzero bits of b1 .|. b2 + -- it would be nice if we could shift m by more than 1 each time + let ba = b1 .&. b2 + go !i !i1 !i2 !m + | m > b' = return () + | b' .&. m == 0 = go i i1 i2 (m `unsafeShiftL` 1) + | ba .&. m /= 0 = do + x1 <- A.indexM ary1 i1 + x2 <- A.indexM ary2 i2 + A.write mary i $! f x1 x2 + go (i+1) (i1+1) (i2+1) (m `unsafeShiftL` 1) + | b1 .&. m /= 0 = do + A.write mary i =<< A.indexM ary1 i1 + go (i+1) (i1+1) (i2 ) (m `unsafeShiftL` 1) + | otherwise = do + A.write mary i =<< A.indexM ary2 i2 + go (i+1) (i1 ) (i2+1) (m `unsafeShiftL` 1) + go 0 0 0 (b' .&. negate b') -- XXX: b' must be non-zero + return mary + -- TODO: For the case where b1 .&. b2 == b1, i.e. when one is a + -- subset of the other, we could use a slightly simpler algorithm, + -- where we copy one array, and then update. +{-# INLINE unionArrayBy #-} + +-- TODO: Figure out the time complexity of 'unions'. + +-- | Construct a set containing all elements from a list of sets. +unions :: (Eq k, Hashable k) => [HashMap k v] -> HashMap k v +unions = L.foldl' union empty +{-# INLINE unions #-} + +------------------------------------------------------------------------ +-- * Transformations + +-- | /O(n)/ Transform this map by applying a function to every value. +mapWithKey :: (k -> v1 -> v2) -> HashMap k v1 -> HashMap k v2 +mapWithKey f = go + where + go Empty = Empty + go (Leaf h (L k v)) = Leaf h $ L k (f k v) + go (BitmapIndexed b ary) = BitmapIndexed b $ A.map go ary + go (Full ary) = Full $ A.map go ary + -- Why map strictly over collision arrays? Because there's no + -- point suspending the O(1) work this does for each leaf. + go (Collision h ary) = Collision h $ + A.map' (\ (L k v) -> L k (f k v)) ary +{-# INLINE mapWithKey #-} + +-- | /O(n)/ Transform this map by applying a function to every value. +map :: (v1 -> v2) -> HashMap k v1 -> HashMap k v2 +map f = mapWithKey (const f) +{-# INLINE map #-} + +-- TODO: We should be able to use mutation to create the new +-- 'HashMap'. + +-- | /O(n)/ Perform an 'Applicative' action for each key-value pair +-- in a 'HashMap' and produce a 'HashMap' of all the results. +-- +-- Note: the order in which the actions occur is unspecified. In particular, +-- when the map contains hash collisions, the order in which the actions +-- associated with the keys involved will depend in an unspecified way on +-- their insertion order. +traverseWithKey + :: Applicative f + => (k -> v1 -> f v2) + -> HashMap k v1 -> f (HashMap k v2) +traverseWithKey f = go + where + go Empty = pure Empty + go (Leaf h (L k v)) = Leaf h . L k <$> f k v + go (BitmapIndexed b ary) = BitmapIndexed b <$> A.traverse go ary + go (Full ary) = Full <$> A.traverse go ary + go (Collision h ary) = + Collision h <$> A.traverse' (\ (L k v) -> L k <$> f k v) ary +{-# INLINE traverseWithKey #-} + +------------------------------------------------------------------------ +-- * Difference and intersection + +-- | /O(n*log m)/ Difference of two maps. Return elements of the first map +-- not existing in the second. +difference :: (Eq k, Hashable k) => HashMap k v -> HashMap k w -> HashMap k v +difference a b = foldlWithKey' go empty a + where + go m k v = case lookup k b of + Nothing -> insert k v m + _ -> m +{-# INLINABLE difference #-} + +-- | /O(n*log m)/ Difference with a combining function. When two equal keys are +-- encountered, the combining function is applied to the values of these keys. +-- If it returns 'Nothing', the element is discarded (proper set difference). If +-- it returns (@'Just' y@), the element is updated with a new value @y@. +differenceWith :: (Eq k, Hashable k) => (v -> w -> Maybe v) -> HashMap k v -> HashMap k w -> HashMap k v +differenceWith f a b = foldlWithKey' go empty a + where + go m k v = case lookup k b of + Nothing -> insert k v m + Just w -> maybe m (\y -> insert k y m) (f v w) +{-# INLINABLE differenceWith #-} + +-- | /O(n*log m)/ Intersection of two maps. Return elements of the first +-- map for keys existing in the second. +intersection :: (Eq k, Hashable k) => HashMap k v -> HashMap k w -> HashMap k v +intersection a b = foldlWithKey' go empty a + where + go m k v = case lookup k b of + Just _ -> insert k v m + _ -> m +{-# INLINABLE intersection #-} + +-- | /O(n+m)/ Intersection of two maps. If a key occurs in both maps +-- the provided function is used to combine the values from the two +-- maps. +intersectionWith :: (Eq k, Hashable k) => (v1 -> v2 -> v3) -> HashMap k v1 + -> HashMap k v2 -> HashMap k v3 +intersectionWith f a b = foldlWithKey' go empty a + where + go m k v = case lookup k b of + Just w -> insert k (f v w) m + _ -> m +{-# INLINABLE intersectionWith #-} + +-- | /O(n+m)/ Intersection of two maps. If a key occurs in both maps +-- the provided function is used to combine the values from the two +-- maps. +intersectionWithKey :: (Eq k, Hashable k) => (k -> v1 -> v2 -> v3) + -> HashMap k v1 -> HashMap k v2 -> HashMap k v3 +intersectionWithKey f a b = foldlWithKey' go empty a + where + go m k v = case lookup k b of + Just w -> insert k (f k v w) m + _ -> m +{-# INLINABLE intersectionWithKey #-} + +------------------------------------------------------------------------ +-- * Folds + +-- | /O(n)/ Reduce this map by applying a binary operator to all +-- elements, using the given starting value (typically the +-- left-identity of the operator). Each application of the operator +-- is evaluated before using the result in the next application. +-- This function is strict in the starting value. +foldl' :: (a -> v -> a) -> a -> HashMap k v -> a +foldl' f = foldlWithKey' (\ z _ v -> f z v) +{-# INLINE foldl' #-} + +-- | /O(n)/ Reduce this map by applying a binary operator to all +-- elements, using the given starting value (typically the +-- left-identity of the operator). Each application of the operator +-- is evaluated before using the result in the next application. +-- This function is strict in the starting value. +foldlWithKey' :: (a -> k -> v -> a) -> a -> HashMap k v -> a +foldlWithKey' f = go + where + go !z Empty = z + go z (Leaf _ (L k v)) = f z k v + go z (BitmapIndexed _ ary) = A.foldl' go z ary + go z (Full ary) = A.foldl' go z ary + go z (Collision _ ary) = A.foldl' (\ z' (L k v) -> f z' k v) z ary +{-# INLINE foldlWithKey' #-} + +-- | /O(n)/ Reduce this map by applying a binary operator to all +-- elements, using the given starting value (typically the +-- right-identity of the operator). +foldr :: (v -> a -> a) -> a -> HashMap k v -> a +foldr f = foldrWithKey (const f) +{-# INLINE foldr #-} + +-- | /O(n)/ Reduce this map by applying a binary operator to all +-- elements, using the given starting value (typically the +-- right-identity of the operator). +foldrWithKey :: (k -> v -> a -> a) -> a -> HashMap k v -> a +foldrWithKey f = go + where + go z Empty = z + go z (Leaf _ (L k v)) = f k v z + go z (BitmapIndexed _ ary) = A.foldr (flip go) z ary + go z (Full ary) = A.foldr (flip go) z ary + go z (Collision _ ary) = A.foldr (\ (L k v) z' -> f k v z') z ary +{-# INLINE foldrWithKey #-} + +------------------------------------------------------------------------ +-- * Filter + +-- | /O(n)/ Transform this map by applying a function to every value +-- and retaining only some of them. +mapMaybeWithKey :: (k -> v1 -> Maybe v2) -> HashMap k v1 -> HashMap k v2 +mapMaybeWithKey f = filterMapAux onLeaf onColl + where onLeaf (Leaf h (L k v)) | Just v' <- f k v = Just (Leaf h (L k v')) + onLeaf _ = Nothing + + onColl (L k v) | Just v' <- f k v = Just (L k v') + | otherwise = Nothing +{-# INLINE mapMaybeWithKey #-} + +-- | /O(n)/ Transform this map by applying a function to every value +-- and retaining only some of them. +mapMaybe :: (v1 -> Maybe v2) -> HashMap k v1 -> HashMap k v2 +mapMaybe f = mapMaybeWithKey (const f) +{-# INLINE mapMaybe #-} + +-- | /O(n)/ Filter this map by retaining only elements satisfying a +-- predicate. +filterWithKey :: forall k v. (k -> v -> Bool) -> HashMap k v -> HashMap k v +filterWithKey pred = filterMapAux onLeaf onColl + where onLeaf t@(Leaf _ (L k v)) | pred k v = Just t + onLeaf _ = Nothing + + onColl el@(L k v) | pred k v = Just el + onColl _ = Nothing +{-# INLINE filterWithKey #-} + + +-- | Common implementation for 'filterWithKey' and 'mapMaybeWithKey', +-- allowing the former to former to reuse terms. +filterMapAux :: forall k v1 v2 + . (HashMap k v1 -> Maybe (HashMap k v2)) + -> (Leaf k v1 -> Maybe (Leaf k v2)) + -> HashMap k v1 + -> HashMap k v2 +filterMapAux onLeaf onColl = go + where + go Empty = Empty + go t@Leaf{} + | Just t' <- onLeaf t = t' + | otherwise = Empty + go (BitmapIndexed b ary) = filterA ary b + go (Full ary) = filterA ary fullNodeMask + go (Collision h ary) = filterC ary h + + filterA ary0 b0 = + let !n = A.length ary0 + in runST $ do + mary <- A.new_ n + step ary0 mary b0 0 0 1 n + where + step :: A.Array (HashMap k v1) -> A.MArray s (HashMap k v2) + -> Bitmap -> Int -> Int -> Bitmap -> Int + -> ST s (HashMap k v2) + step !ary !mary !b i !j !bi n + | i >= n = case j of + 0 -> return Empty + 1 -> do + ch <- A.read mary 0 + case ch of + t | isLeafOrCollision t -> return t + _ -> BitmapIndexed b <$> A.trim mary 1 + _ -> do + ary2 <- A.trim mary j + return $! if j == maxChildren + then Full ary2 + else BitmapIndexed b ary2 + | bi .&. b == 0 = step ary mary b i j (bi `unsafeShiftL` 1) n + | otherwise = case go (A.index ary i) of + Empty -> step ary mary (b .&. complement bi) (i+1) j + (bi `unsafeShiftL` 1) n + t -> do A.write mary j t + step ary mary b (i+1) (j+1) (bi `unsafeShiftL` 1) n + + filterC ary0 h = + let !n = A.length ary0 + in runST $ do + mary <- A.new_ n + step ary0 mary 0 0 n + where + step :: A.Array (Leaf k v1) -> A.MArray s (Leaf k v2) + -> Int -> Int -> Int + -> ST s (HashMap k v2) + step !ary !mary i !j n + | i >= n = case j of + 0 -> return Empty + 1 -> do l <- A.read mary 0 + return $! Leaf h l + _ | i == j -> do ary2 <- A.unsafeFreeze mary + return $! Collision h ary2 + | otherwise -> do ary2 <- A.trim mary j + return $! Collision h ary2 + | Just el <- onColl $! A.index ary i + = A.write mary j el >> step ary mary (i+1) (j+1) n + | otherwise = step ary mary (i+1) j n +{-# INLINE filterMapAux #-} + +-- | /O(n)/ Filter this map by retaining only elements which values +-- satisfy a predicate. +filter :: (v -> Bool) -> HashMap k v -> HashMap k v +filter p = filterWithKey (\_ v -> p v) +{-# INLINE filter #-} + +------------------------------------------------------------------------ +-- * Conversions + +-- TODO: Improve fusion rules by modelled them after the Prelude ones +-- on lists. + +-- | /O(n)/ Return a list of this map's keys. The list is produced +-- lazily. +keys :: HashMap k v -> [k] +keys = L.map fst . toList +{-# INLINE keys #-} + +-- | /O(n)/ Return a list of this map's values. The list is produced +-- lazily. +elems :: HashMap k v -> [v] +elems = L.map snd . toList +{-# INLINE elems #-} + +------------------------------------------------------------------------ +-- ** Lists + +-- | /O(n)/ Return a list of this map's elements. The list is +-- produced lazily. The order of its elements is unspecified. +toList :: HashMap k v -> [(k, v)] +toList t = build (\ c z -> foldrWithKey (curry c) z t) +{-# INLINE toList #-} + +-- | /O(n)/ Construct a map with the supplied mappings. If the list +-- contains duplicate mappings, the later mappings take precedence. +fromList :: (Eq k, Hashable k) => [(k, v)] -> HashMap k v +fromList = L.foldl' (\ m (k, v) -> unsafeInsert k v m) empty +{-# INLINABLE fromList #-} + +-- | /O(n*log n)/ Construct a map from a list of elements. Uses +-- the provided function to merge duplicate entries. +fromListWith :: (Eq k, Hashable k) => (v -> v -> v) -> [(k, v)] -> HashMap k v +fromListWith f = L.foldl' (\ m (k, v) -> unsafeInsertWith f k v m) empty +{-# INLINE fromListWith #-} + +------------------------------------------------------------------------ +-- Array operations + +-- | /O(n)/ Look up the value associated with the given key in an +-- array. +lookupInArrayCont :: +#if __GLASGOW_HASKELL__ >= 802 + forall rep (r :: TYPE rep) k v. +#else + forall r k v. +#endif + Eq k => ((# #) -> r) -> (v -> Int -> r) -> k -> A.Array (Leaf k v) -> r +lookupInArrayCont absent present k0 ary0 = go k0 ary0 0 (A.length ary0) + where + go :: Eq k => k -> A.Array (Leaf k v) -> Int -> Int -> r + go !k !ary !i !n + | i >= n = absent (# #) + | otherwise = case A.index ary i of + (L kx v) + | k == kx -> present v i + | otherwise -> go k ary (i+1) n +{-# INLINE lookupInArrayCont #-} + +-- | /O(n)/ Lookup the value associated with the given key in this +-- array. Returns 'Nothing' if the key wasn't found. +indexOf :: Eq k => k -> A.Array (Leaf k v) -> Maybe Int +indexOf k0 ary0 = go k0 ary0 0 (A.length ary0) + where + go !k !ary !i !n + | i >= n = Nothing + | otherwise = case A.index ary i of + (L kx _) + | k == kx -> Just i + | otherwise -> go k ary (i+1) n +{-# INLINABLE indexOf #-} + +updateWith# :: Eq k => (v -> (# v #)) -> k -> A.Array (Leaf k v) -> A.Array (Leaf k v) +updateWith# f k0 ary0 = go k0 ary0 0 (A.length ary0) + where + go !k !ary !i !n + | i >= n = ary + | otherwise = case A.index ary i of + (L kx y) | k == kx -> case f y of + (# y' #) + | ptrEq y y' -> ary + | otherwise -> A.update ary i (L k y') + | otherwise -> go k ary (i+1) n +{-# INLINABLE updateWith# #-} + +updateOrSnocWith :: Eq k => (v -> v -> v) -> k -> v -> A.Array (Leaf k v) + -> A.Array (Leaf k v) +updateOrSnocWith f = updateOrSnocWithKey (const f) +{-# INLINABLE updateOrSnocWith #-} + +updateOrSnocWithKey :: Eq k => (k -> v -> v -> v) -> k -> v -> A.Array (Leaf k v) + -> A.Array (Leaf k v) +updateOrSnocWithKey f k0 v0 ary0 = go k0 v0 ary0 0 (A.length ary0) + where + go !k v !ary !i !n + | i >= n = A.run $ do + -- Not found, append to the end. + mary <- A.new_ (n + 1) + A.copy ary 0 mary 0 n + A.write mary n (L k v) + return mary + | otherwise = case A.index ary i of + (L kx y) | k == kx -> A.update ary i (L k (f k v y)) + | otherwise -> go k v ary (i+1) n +{-# INLINABLE updateOrSnocWithKey #-} + +updateOrConcatWith :: Eq k => (v -> v -> v) -> A.Array (Leaf k v) -> A.Array (Leaf k v) -> A.Array (Leaf k v) +updateOrConcatWith f = updateOrConcatWithKey (const f) +{-# INLINABLE updateOrConcatWith #-} + +updateOrConcatWithKey :: Eq k => (k -> v -> v -> v) -> A.Array (Leaf k v) -> A.Array (Leaf k v) -> A.Array (Leaf k v) +updateOrConcatWithKey f ary1 ary2 = A.run $ do + -- TODO: instead of mapping and then folding, should we traverse? + -- We'll have to be careful to avoid allocating pairs or similar. + + -- first: look up the position of each element of ary2 in ary1 + let indices = A.map' (\(L k _) -> indexOf k ary1) ary2 + -- that tells us how large the overlap is: + -- count number of Nothing constructors + let nOnly2 = A.foldl' (\n -> maybe (n+1) (const n)) 0 indices + let n1 = A.length ary1 + let n2 = A.length ary2 + -- copy over all elements from ary1 + mary <- A.new_ (n1 + nOnly2) + A.copy ary1 0 mary 0 n1 + -- append or update all elements from ary2 + let go !iEnd !i2 + | i2 >= n2 = return () + | otherwise = case A.index indices i2 of + Just i1 -> do -- key occurs in both arrays, store combination in position i1 + L k v1 <- A.indexM ary1 i1 + L _ v2 <- A.indexM ary2 i2 + A.write mary i1 (L k (f k v1 v2)) + go iEnd (i2+1) + Nothing -> do -- key is only in ary2, append to end + A.write mary iEnd =<< A.indexM ary2 i2 + go (iEnd+1) (i2+1) + go n1 0 + return mary +{-# INLINABLE updateOrConcatWithKey #-} + +------------------------------------------------------------------------ +-- Manually unrolled loops + +-- | /O(n)/ Update the element at the given position in this array. +update16 :: A.Array e -> Int -> e -> A.Array e +update16 ary idx b = runST (update16M ary idx b) +{-# INLINE update16 #-} + +-- | /O(n)/ Update the element at the given position in this array. +update16M :: A.Array e -> Int -> e -> ST s (A.Array e) +update16M ary idx b = do + mary <- clone16 ary + A.write mary idx b + A.unsafeFreeze mary +{-# INLINE update16M #-} + +-- | /O(n)/ Update the element at the given position in this array, by applying a function to it. +update16With' :: A.Array e -> Int -> (e -> e) -> A.Array e +update16With' ary idx f + | (# x #) <- A.index# ary idx + = update16 ary idx $! f x +{-# INLINE update16With' #-} + +-- | Unsafely clone an array of 16 elements. The length of the input +-- array is not checked. +clone16 :: A.Array e -> ST s (A.MArray s e) +clone16 ary = + A.thaw ary 0 16 + +------------------------------------------------------------------------ +-- Bit twiddling + +bitsPerSubkey :: Int +bitsPerSubkey = 4 + +maxChildren :: Int +maxChildren = fromIntegral $ 1 `unsafeShiftL` bitsPerSubkey + +subkeyMask :: Bitmap +subkeyMask = 1 `unsafeShiftL` bitsPerSubkey - 1 + +sparseIndex :: Bitmap -> Bitmap -> Int +sparseIndex b m = popCount (b .&. (m - 1)) + +mask :: Word -> Shift -> Bitmap +mask w s = 1 `unsafeShiftL` index w s +{-# INLINE mask #-} + +-- | Mask out the 'bitsPerSubkey' bits used for indexing at this level +-- of the tree. +index :: Hash -> Shift -> Int +index w s = fromIntegral $ (unsafeShiftR w s) .&. subkeyMask +{-# INLINE index #-} + +-- | A bitmask with the 'bitsPerSubkey' least significant bits set. +fullNodeMask :: Bitmap +fullNodeMask = complement (complement 0 `unsafeShiftL` maxChildren) +{-# INLINE fullNodeMask #-} + +-- | Check if two the two arguments are the same value. N.B. This +-- function might give false negatives (due to GC moving objects.) +ptrEq :: a -> a -> Bool +ptrEq x y = isTrue# (reallyUnsafePtrEquality# x y ==# 1#) +{-# INLINE ptrEq #-} + +------------------------------------------------------------------------ +-- IsList instance +instance (Eq k, Hashable k) => Exts.IsList (HashMap k v) where + type Item (HashMap k v) = (k, v) + fromList = fromList + toList = toList diff --git a/Data/HashMap/Lazy.hs b/Data/HashMap/Lazy.hs new file mode 100644 index 0000000..c730635 --- /dev/null +++ b/Data/HashMap/Lazy.hs @@ -0,0 +1,103 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE Trustworthy #-} + +------------------------------------------------------------------------ +-- | +-- Module : Data.HashMap.Lazy +-- Copyright : 2010-2012 Johan Tibell +-- License : BSD-style +-- Maintainer : johan.tibell@gmail.com +-- Stability : provisional +-- Portability : portable +-- +-- A map from /hashable/ keys to values. A map cannot contain +-- duplicate keys; each key can map to at most one value. A 'HashMap' +-- makes no guarantees as to the order of its elements. +-- +-- The implementation is based on /hash array mapped tries/. A +-- 'HashMap' is often faster than other tree-based set types, +-- especially when key comparison is expensive, as in the case of +-- strings. +-- +-- Many operations have a average-case complexity of /O(log n)/. The +-- implementation uses a large base (i.e. 16) so in practice these +-- operations are constant time. +module Data.HashMap.Lazy + ( + -- * Strictness properties + -- $strictness + + HashMap + + -- * Construction + , empty + , singleton + + -- * Basic interface + , null + , size + , member + , lookup + , lookupDefault + , (!) + , insert + , insertWith + , delete + , adjust + , update + , alter + , alterF + + -- * Combine + -- ** Union + , union + , unionWith + , unionWithKey + , unions + + -- * Transformations + , map + , mapWithKey + , traverseWithKey + + -- * Difference and intersection + , difference + , differenceWith + , intersection + , intersectionWith + , intersectionWithKey + + -- * Folds + , foldl' + , foldlWithKey' + , foldr + , foldrWithKey + + -- * Filter + , filter + , filterWithKey + , mapMaybe + , mapMaybeWithKey + + -- * Conversions + , keys + , elems + + -- ** Lists + , toList + , fromList + , fromListWith + + -- ** HashSets + , HS.keysSet + ) where + +import Data.HashMap.Base as HM +import qualified Data.HashSet.Base as HS +import Prelude () + +-- $strictness +-- +-- This module satisfies the following strictness property: +-- +-- * Key arguments are evaluated to WHNF diff --git a/Data/HashMap/List.hs b/Data/HashMap/List.hs new file mode 100644 index 0000000..c0f55aa --- /dev/null +++ b/Data/HashMap/List.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} +-- | Extra list functions +-- +-- In separate module to aid testing. +module Data.HashMap.List + ( isPermutationBy + , deleteBy + , unorderedCompare + ) where + +import Data.Maybe (fromMaybe) +import Data.List (sortBy) +import Data.Monoid +import Prelude + +-- Note: previous implemenation isPermutation = null (as // bs) +-- was O(n^2) too. +-- +-- This assumes lists are of equal length +isPermutationBy :: (a -> b -> Bool) -> [a] -> [b] -> Bool +isPermutationBy f = go + where + f' = flip f + + go [] [] = True + go (x : xs) (y : ys) + | f x y = go xs ys + | otherwise = fromMaybe False $ do + xs' <- deleteBy f' y xs + ys' <- deleteBy f x ys + return (go xs' ys') + go [] (_ : _) = False + go (_ : _) [] = False + +-- The idea: +-- +-- Homogeonous version +-- +-- uc :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering +-- uc c as bs = compare (sortBy c as) (sortBy c bs) +-- +-- But as we have only (a -> b -> Ordering), we cannot directly compare +-- elements from the same list. +-- +-- So when comparing elements from the list, we count how many elements are +-- "less and greater" in the other list, and use the count as a metric. +-- +unorderedCompare :: (a -> b -> Ordering) -> [a] -> [b] -> Ordering +unorderedCompare c as bs = go (sortBy cmpA as) (sortBy cmpB bs) + where + go [] [] = EQ + go [] (_ : _) = LT + go (_ : _) [] = GT + go (x : xs) (y : ys) = c x y `mappend` go xs ys + + cmpA a a' = compare (inB a) (inB a') + cmpB b b' = compare (inA b) (inA b') + + inB a = (length $ filter (\b -> c a b == GT) bs, negate $ length $ filter (\b -> c a b == LT) bs) + inA b = (length $ filter (\a -> c a b == LT) as, negate $ length $ filter (\a -> c a b == GT) as) + +-- Returns Nothing is nothing deleted +deleteBy :: (a -> b -> Bool) -> a -> [b] -> Maybe [b] +deleteBy _ _ [] = Nothing +deleteBy eq x (y:ys) = if x `eq` y then Just ys else fmap (y :) (deleteBy eq x ys) diff --git a/Data/HashMap/Strict.hs b/Data/HashMap/Strict.hs new file mode 100644 index 0000000..3af68b7 --- /dev/null +++ b/Data/HashMap/Strict.hs @@ -0,0 +1,96 @@ +{-# LANGUAGE Safe #-} + +------------------------------------------------------------------------ +-- | +-- Module : Data.HashMap.Strict +-- Copyright : 2010-2012 Johan Tibell +-- License : BSD-style +-- Maintainer : johan.tibell@gmail.com +-- Stability : provisional +-- Portability : portable +-- +-- A map from /hashable/ keys to values. A map cannot contain +-- duplicate keys; each key can map to at most one value. A 'HashMap' +-- makes no guarantees as to the order of its elements. +-- +-- The implementation is based on /hash array mapped tries/. A +-- 'HashMap' is often faster than other tree-based set types, +-- especially when key comparison is expensive, as in the case of +-- strings. +-- +-- Many operations have a average-case complexity of /O(log n)/. The +-- implementation uses a large base (i.e. 16) so in practice these +-- operations are constant time. +module Data.HashMap.Strict + ( + -- * Strictness properties + -- $strictness + + HashMap + + -- * Construction + , empty + , singleton + + -- * Basic interface + , null + , size + , member + , lookup + , lookupDefault + , (!) + , insert + , insertWith + , delete + , adjust + , update + , alter + , alterF + + -- * Combine + -- ** Union + , union + , unionWith + , unionWithKey + , unions + + -- * Transformations + , map + , mapWithKey + , traverseWithKey + + -- * Difference and intersection + , difference + , differenceWith + , intersection + , intersectionWith + , intersectionWithKey + + -- * Folds + , foldl' + , foldlWithKey' + , foldr + , foldrWithKey + + -- * Filter + , filter + , filterWithKey + , mapMaybe + , mapMaybeWithKey + + -- * Conversions + , keys + , elems + + -- ** Lists + , toList + , fromList + , fromListWith + + -- ** HashSets + , HS.keysSet + ) where + +import Data.HashMap.Strict.Base as HM +import qualified Data.HashSet.Base as HS +import Prelude () diff --git a/Data/HashMap/Strict/Base.hs b/Data/HashMap/Strict/Base.hs new file mode 100644 index 0000000..890d18f --- /dev/null +++ b/Data/HashMap/Strict/Base.hs @@ -0,0 +1,671 @@ +{-# LANGUAGE BangPatterns, CPP, PatternGuards, MagicHash, UnboxedTuples #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE Trustworthy #-} + +------------------------------------------------------------------------ +-- | +-- Module : Data.HashMap.Strict +-- Copyright : 2010-2012 Johan Tibell +-- License : BSD-style +-- Maintainer : johan.tibell@gmail.com +-- Stability : provisional +-- Portability : portable +-- +-- A map from /hashable/ keys to values. A map cannot contain +-- duplicate keys; each key can map to at most one value. A 'HashMap' +-- makes no guarantees as to the order of its elements. +-- +-- The implementation is based on /hash array mapped tries/. A +-- 'HashMap' is often faster than other tree-based set types, +-- especially when key comparison is expensive, as in the case of +-- strings. +-- +-- Many operations have a average-case complexity of /O(log n)/. The +-- implementation uses a large base (i.e. 16) so in practice these +-- operations are constant time. +module Data.HashMap.Strict.Base + ( + -- * Strictness properties + -- $strictness + + HashMap + + -- * Construction + , empty + , singleton + + -- * Basic interface + , HM.null + , size + , HM.member + , HM.lookup + , lookupDefault + , (!) + , insert + , insertWith + , delete + , adjust + , update + , alter + , alterF + + -- * Combine + -- ** Union + , union + , unionWith + , unionWithKey + , unions + + -- * Transformations + , map + , mapWithKey + , traverseWithKey + + -- * Difference and intersection + , difference + , differenceWith + , intersection + , intersectionWith + , intersectionWithKey + + -- * Folds + , foldl' + , foldlWithKey' + , HM.foldr + , foldrWithKey + + -- * Filter + , HM.filter + , filterWithKey + , mapMaybe + , mapMaybeWithKey + + -- * Conversions + , keys + , elems + + -- ** Lists + , toList + , fromList + , fromListWith + ) where + +import Data.Bits ((.&.), (.|.)) + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative (Applicative (..), (<$>)) +#endif +import qualified Data.List as L +import Data.Hashable (Hashable) +import Prelude hiding (map, lookup) + +import qualified Data.HashMap.Array as A +import qualified Data.HashMap.Base as HM +import Data.HashMap.Base hiding ( + alter, alterF, adjust, fromList, fromListWith, insert, insertWith, + differenceWith, intersectionWith, intersectionWithKey, map, mapWithKey, + mapMaybe, mapMaybeWithKey, singleton, update, unionWith, unionWithKey, + traverseWithKey) +import Data.HashMap.Unsafe (runST) +#if MIN_VERSION_base(4,8,0) +import Data.Functor.Identity +#endif +import Control.Applicative (Const (..)) +import Data.Coerce + +-- $strictness +-- +-- This module satisfies the following strictness properties: +-- +-- 1. Key arguments are evaluated to WHNF; +-- +-- 2. Keys and values are evaluated to WHNF before they are stored in +-- the map. + +------------------------------------------------------------------------ +-- * Construction + +-- | /O(1)/ Construct a map with a single element. +singleton :: (Hashable k) => k -> v -> HashMap k v +singleton k !v = HM.singleton k v + +------------------------------------------------------------------------ +-- * Basic interface + +-- | /O(log n)/ Associate the specified value with the specified +-- key in this map. If this map previously contained a mapping for +-- the key, the old value is replaced. +insert :: (Eq k, Hashable k) => k -> v -> HashMap k v -> HashMap k v +insert k !v = HM.insert k v +{-# INLINABLE insert #-} + +-- | /O(log n)/ Associate the value with the key in this map. If +-- this map previously contained a mapping for the key, the old value +-- is replaced by the result of applying the given function to the new +-- and old value. Example: +-- +-- > insertWith f k v map +-- > where f new old = new + old +insertWith :: (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v + -> HashMap k v +insertWith f k0 v0 m0 = go h0 k0 v0 0 m0 + where + h0 = hash k0 + go !h !k x !_ Empty = leaf h k x + go h k x s (Leaf hy l@(L ky y)) + | hy == h = if ky == k + then leaf h k (f x y) + else x `seq` (collision h l (L k x)) + | otherwise = x `seq` runST (two s h k x hy ky y) + go h k x s (BitmapIndexed b ary) + | b .&. m == 0 = + let ary' = A.insert ary i $! leaf h k x + in bitmapIndexedOrFull (b .|. m) ary' + | otherwise = + let st = A.index ary i + st' = go h k x (s+bitsPerSubkey) st + ary' = A.update ary i $! st' + in BitmapIndexed b ary' + where m = mask h s + i = sparseIndex b m + go h k x s (Full ary) = + let st = A.index ary i + st' = go h k x (s+bitsPerSubkey) st + ary' = update16 ary i $! st' + in Full ary' + where i = index h s + go h k x s t@(Collision hy v) + | h == hy = Collision h (updateOrSnocWith f k x v) + | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) +{-# INLINABLE insertWith #-} + +-- | In-place update version of insertWith +unsafeInsertWith :: (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v + -> HashMap k v +unsafeInsertWith f k0 v0 m0 = runST (go h0 k0 v0 0 m0) + where + h0 = hash k0 + go !h !k x !_ Empty = return $! leaf h k x + go h k x s (Leaf hy l@(L ky y)) + | hy == h = if ky == k + then return $! leaf h k (f x y) + else do + let l' = x `seq` (L k x) + return $! collision h l l' + | otherwise = x `seq` two s h k x hy ky y + go h k x s t@(BitmapIndexed b ary) + | b .&. m == 0 = do + ary' <- A.insertM ary i $! leaf h k x + return $! bitmapIndexedOrFull (b .|. m) ary' + | otherwise = do + st <- A.indexM ary i + st' <- go h k x (s+bitsPerSubkey) st + A.unsafeUpdateM ary i st' + return t + where m = mask h s + i = sparseIndex b m + go h k x s t@(Full ary) = do + st <- A.indexM ary i + st' <- go h k x (s+bitsPerSubkey) st + A.unsafeUpdateM ary i st' + return t + where i = index h s + go h k x s t@(Collision hy v) + | h == hy = return $! Collision h (updateOrSnocWith f k x v) + | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) +{-# INLINABLE unsafeInsertWith #-} + +-- | /O(log n)/ Adjust the value tied to a given key in this map only +-- if it is present. Otherwise, leave the map alone. +adjust :: (Eq k, Hashable k) => (v -> v) -> k -> HashMap k v -> HashMap k v +adjust f k0 m0 = go h0 k0 0 m0 + where + h0 = hash k0 + go !_ !_ !_ Empty = Empty + go h k _ t@(Leaf hy (L ky y)) + | hy == h && ky == k = leaf h k (f y) + | otherwise = t + go h k s t@(BitmapIndexed b ary) + | b .&. m == 0 = t + | otherwise = let st = A.index ary i + st' = go h k (s+bitsPerSubkey) st + ary' = A.update ary i $! st' + in BitmapIndexed b ary' + where m = mask h s + i = sparseIndex b m + go h k s (Full ary) = + let i = index h s + st = A.index ary i + st' = go h k (s+bitsPerSubkey) st + ary' = update16 ary i $! st' + in Full ary' + go h k _ t@(Collision hy v) + | h == hy = Collision h (updateWith f k v) + | otherwise = t +{-# INLINABLE adjust #-} + +-- | /O(log n)/ The expression (@'update' f k map@) updates the value @x@ at @k@, +-- (if it is in the map). If (f k x) is @'Nothing', the element is deleted. +-- If it is (@'Just' y), the key k is bound to the new value y. +update :: (Eq k, Hashable k) => (a -> Maybe a) -> k -> HashMap k a -> HashMap k a +update f = alter (>>= f) +{-# INLINABLE update #-} + +-- | /O(log n)/ The expression (@'alter' f k map@) alters the value @x@ at @k@, or +-- absence thereof. @alter@ can be used to insert, delete, or update a value in a +-- map. In short : @'lookup' k ('alter' f k m) = f ('lookup' k m)@. +alter :: (Eq k, Hashable k) => (Maybe v -> Maybe v) -> k -> HashMap k v -> HashMap k v +alter f k m = + case f (HM.lookup k m) of + Nothing -> delete k m + Just v -> insert k v m +{-# INLINABLE alter #-} + +-- | /O(log n)/ The expression (@'alterF' f k map@) alters the value @x@ at +-- @k@, or absence thereof. @alterF@ can be used to insert, delete, or update +-- a value in a map. +-- +-- Note: 'alterF' is a flipped version of the 'at' combinator from +-- . +-- +-- @since 0.2.9 +alterF :: (Functor f, Eq k, Hashable k) + => (Maybe v -> f (Maybe v)) -> k -> HashMap k v -> f (HashMap k v) +-- Special care is taken to only calculate the hash once. When we rewrite +-- with RULES, we also ensure that we only compare the key for equality +-- once. We force the value of the map for consistency with the rewritten +-- version; otherwise someone could tell the difference using a lazy +-- @f@ and a functor that is similar to Const but not actually Const. +alterF f = \ !k !m -> + let !h = hash k + mv = lookup' h k m + in (<$> f mv) $ \fres -> + case fres of + Nothing -> delete' h k m + Just !v' -> insert' h k v' m + +-- We rewrite this function unconditionally in RULES, but we expose +-- an unfolding just in case it's used in a context where the rules +-- don't fire. +{-# INLINABLE [0] alterF #-} + +#if MIN_VERSION_base(4,8,0) +-- See notes in Data.HashMap.Base +test_bottom :: a +test_bottom = error "Data.HashMap.alterF internal error: hit test_bottom" + +bogus# :: (# #) -> (# a #) +bogus# _ = error "Data.HashMap.alterF internal error: hit bogus#" + +impossibleAdjust :: a +impossibleAdjust = error "Data.HashMap.alterF internal error: impossible adjust" + +{-# RULES + +-- See detailed notes on alterF rules in Data.HashMap.Base. + +"alterFWeird" forall f. alterF f = + alterFWeird (f Nothing) (f (Just test_bottom)) f + +"alterFconstant" forall (f :: Maybe a -> Identity (Maybe a)) x. + alterFWeird x x f = \ !k !m -> + Identity (case runIdentity x of {Nothing -> delete k m; Just a -> insert k a m}) + +"alterFinsertWith" [1] forall (f :: Maybe a -> Identity (Maybe a)) x y. + alterFWeird (coerce (Just x)) (coerce (Just y)) f = + coerce (insertModifying x (\mold -> case runIdentity (f (Just mold)) of + Nothing -> bogus# (# #) + Just !new -> (# new #))) + +-- This rule is written a bit differently than the one for lazy +-- maps because the adjust here is strict. We could write it the +-- same general way anyway, but this seems simpler. +"alterFadjust" forall (f :: Maybe a -> Identity (Maybe a)) x. + alterFWeird (coerce Nothing) (coerce (Just x)) f = + coerce (adjust (\a -> case runIdentity (f (Just a)) of + Just a' -> a' + Nothing -> impossibleAdjust)) + +"alterFlookup" forall _ign1 _ign2 (f :: Maybe a -> Const r (Maybe a)) . + alterFWeird _ign1 _ign2 f = \ !k !m -> Const (getConst (f (lookup k m))) + #-} + +-- This is a very unsafe version of alterF used for RULES. When calling +-- alterFWeird x y f, the following *must* hold: +-- +-- x = f Nothing +-- y = f (Just _|_) +-- +-- Failure to abide by these laws will make demons come out of your nose. +alterFWeird + :: (Functor f, Eq k, Hashable k) + => f (Maybe v) + -> f (Maybe v) + -> (Maybe v -> f (Maybe v)) -> k -> HashMap k v -> f (HashMap k v) +alterFWeird _ _ f = alterFEager f +{-# INLINE [0] alterFWeird #-} + +-- | This is the default version of alterF that we use in most non-trivial +-- cases. It's called "eager" because it looks up the given key in the map +-- eagerly, whether or not the given function requires that information. +alterFEager :: (Functor f, Eq k, Hashable k) + => (Maybe v -> f (Maybe v)) -> k -> HashMap k v -> f (HashMap k v) +alterFEager f !k !m = (<$> f mv) $ \fres -> + case fres of + + ------------------------------ + -- Delete the key from the map. + Nothing -> case lookupRes of + + -- Key did not exist in the map to begin with, no-op + Absent -> m + + -- Key did exist, no collision + Present _ collPos -> deleteKeyExists collPos h k m + + ------------------------------ + -- Update value + Just v' -> case lookupRes of + + -- Key did not exist before, insert v' under a new key + Absent -> insertNewKey h k v' m + + -- Key existed before, no hash collision + Present v collPos -> v' `seq` + if v `ptrEq` v' + -- If the value is identical, no-op + then m + -- If the value changed, update the value. + else insertKeyExists collPos h k v' m + + where !h = hash k + !lookupRes = lookupRecordCollision h k m + !mv = case lookupRes of + Absent -> Nothing + Present v _ -> Just v +{-# INLINABLE alterFEager #-} +#endif + +------------------------------------------------------------------------ +-- * Combine + +-- | /O(n+m)/ The union of two maps. If a key occurs in both maps, +-- the provided function (first argument) will be used to compute the result. +unionWith :: (Eq k, Hashable k) => (v -> v -> v) -> HashMap k v -> HashMap k v + -> HashMap k v +unionWith f = unionWithKey (const f) +{-# INLINE unionWith #-} + +-- | /O(n+m)/ The union of two maps. If a key occurs in both maps, +-- the provided function (first argument) will be used to compute the result. +unionWithKey :: (Eq k, Hashable k) => (k -> v -> v -> v) -> HashMap k v -> HashMap k v + -> HashMap k v +unionWithKey f = go 0 + where + -- empty vs. anything + go !_ t1 Empty = t1 + go _ Empty t2 = t2 + -- leaf vs. leaf + go s t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) + | h1 == h2 = if k1 == k2 + then leaf h1 k1 (f k1 v1 v2) + else collision h1 l1 l2 + | otherwise = goDifferentHash s h1 h2 t1 t2 + go s t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) + | h1 == h2 = Collision h1 (updateOrSnocWithKey f k1 v1 ls2) + | otherwise = goDifferentHash s h1 h2 t1 t2 + go s t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) + | h1 == h2 = Collision h1 (updateOrSnocWithKey (flip . f) k2 v2 ls1) + | otherwise = goDifferentHash s h1 h2 t1 t2 + go s t1@(Collision h1 ls1) t2@(Collision h2 ls2) + | h1 == h2 = Collision h1 (updateOrConcatWithKey f ls1 ls2) + | otherwise = goDifferentHash s h1 h2 t1 t2 + -- branch vs. branch + go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = + let b' = b1 .|. b2 + ary' = unionArrayBy (go (s+bitsPerSubkey)) b1 b2 ary1 ary2 + in bitmapIndexedOrFull b' ary' + go s (BitmapIndexed b1 ary1) (Full ary2) = + let ary' = unionArrayBy (go (s+bitsPerSubkey)) b1 fullNodeMask ary1 ary2 + in Full ary' + go s (Full ary1) (BitmapIndexed b2 ary2) = + let ary' = unionArrayBy (go (s+bitsPerSubkey)) fullNodeMask b2 ary1 ary2 + in Full ary' + go s (Full ary1) (Full ary2) = + let ary' = unionArrayBy (go (s+bitsPerSubkey)) fullNodeMask fullNodeMask + ary1 ary2 + in Full ary' + -- leaf vs. branch + go s (BitmapIndexed b1 ary1) t2 + | b1 .&. m2 == 0 = let ary' = A.insert ary1 i t2 + b' = b1 .|. m2 + in bitmapIndexedOrFull b' ary' + | otherwise = let ary' = A.updateWith' ary1 i $ \st1 -> + go (s+bitsPerSubkey) st1 t2 + in BitmapIndexed b1 ary' + where + h2 = leafHashCode t2 + m2 = mask h2 s + i = sparseIndex b1 m2 + go s t1 (BitmapIndexed b2 ary2) + | b2 .&. m1 == 0 = let ary' = A.insert ary2 i $! t1 + b' = b2 .|. m1 + in bitmapIndexedOrFull b' ary' + | otherwise = let ary' = A.updateWith' ary2 i $ \st2 -> + go (s+bitsPerSubkey) t1 st2 + in BitmapIndexed b2 ary' + where + h1 = leafHashCode t1 + m1 = mask h1 s + i = sparseIndex b2 m1 + go s (Full ary1) t2 = + let h2 = leafHashCode t2 + i = index h2 s + ary' = update16With' ary1 i $ \st1 -> go (s+bitsPerSubkey) st1 t2 + in Full ary' + go s t1 (Full ary2) = + let h1 = leafHashCode t1 + i = index h1 s + ary' = update16With' ary2 i $ \st2 -> go (s+bitsPerSubkey) t1 st2 + in Full ary' + + leafHashCode (Leaf h _) = h + leafHashCode (Collision h _) = h + leafHashCode _ = error "leafHashCode" + + goDifferentHash s h1 h2 t1 t2 + | m1 == m2 = BitmapIndexed m1 (A.singleton $! go (s+bitsPerSubkey) t1 t2) + | m1 < m2 = BitmapIndexed (m1 .|. m2) (A.pair t1 t2) + | otherwise = BitmapIndexed (m1 .|. m2) (A.pair t2 t1) + where + m1 = mask h1 s + m2 = mask h2 s +{-# INLINE unionWithKey #-} + +------------------------------------------------------------------------ +-- * Transformations + +-- | /O(n)/ Transform this map by applying a function to every value. +mapWithKey :: (k -> v1 -> v2) -> HashMap k v1 -> HashMap k v2 +mapWithKey f = go + where + go Empty = Empty + go (Leaf h (L k v)) = leaf h k (f k v) + go (BitmapIndexed b ary) = BitmapIndexed b $ A.map' go ary + go (Full ary) = Full $ A.map' go ary + go (Collision h ary) = + Collision h $ A.map' (\ (L k v) -> let !v' = f k v in L k v') ary +{-# INLINE mapWithKey #-} + +-- | /O(n)/ Transform this map by applying a function to every value. +map :: (v1 -> v2) -> HashMap k v1 -> HashMap k v2 +map f = mapWithKey (const f) +{-# INLINE map #-} + + +------------------------------------------------------------------------ +-- * Filter + +-- | /O(n)/ Transform this map by applying a function to every value +-- and retaining only some of them. +mapMaybeWithKey :: (k -> v1 -> Maybe v2) -> HashMap k v1 -> HashMap k v2 +mapMaybeWithKey f = filterMapAux onLeaf onColl + where onLeaf (Leaf h (L k v)) | Just v' <- f k v = Just (leaf h k v') + onLeaf _ = Nothing + + onColl (L k v) | Just v' <- f k v = Just (L k v') + | otherwise = Nothing +{-# INLINE mapMaybeWithKey #-} + +-- | /O(n)/ Transform this map by applying a function to every value +-- and retaining only some of them. +mapMaybe :: (v1 -> Maybe v2) -> HashMap k v1 -> HashMap k v2 +mapMaybe f = mapMaybeWithKey (const f) +{-# INLINE mapMaybe #-} + +-- | /O(n)/ Perform an 'Applicative' action for each key-value pair +-- in a 'HashMap' and produce a 'HashMap' of all the results. Each 'HashMap' +-- will be strict in all its values. +-- +-- @ +-- traverseWithKey f = fmap ('map' id) . "Data.HashMap.Lazy".'Data.HashMap.Lazy.traverseWithKey' f +-- @ +-- +-- Note: the order in which the actions occur is unspecified. In particular, +-- when the map contains hash collisions, the order in which the actions +-- associated with the keys involved will depend in an unspecified way on +-- their insertion order. +traverseWithKey + :: Applicative f + => (k -> v1 -> f v2) + -> HashMap k v1 -> f (HashMap k v2) +traverseWithKey f = go + where + go Empty = pure Empty + go (Leaf h (L k v)) = leaf h k <$> f k v + go (BitmapIndexed b ary) = BitmapIndexed b <$> A.traverse' go ary + go (Full ary) = Full <$> A.traverse' go ary + go (Collision h ary) = + Collision h <$> A.traverse' (\ (L k v) -> (L k $!) <$> f k v) ary +{-# INLINE traverseWithKey #-} + +------------------------------------------------------------------------ +-- * Difference and intersection + +-- | /O(n*log m)/ Difference with a combining function. When two equal keys are +-- encountered, the combining function is applied to the values of these keys. +-- If it returns 'Nothing', the element is discarded (proper set difference). If +-- it returns (@'Just' y@), the element is updated with a new value @y@. +differenceWith :: (Eq k, Hashable k) => (v -> w -> Maybe v) -> HashMap k v -> HashMap k w -> HashMap k v +differenceWith f a b = foldlWithKey' go empty a + where + go m k v = case HM.lookup k b of + Nothing -> insert k v m + Just w -> maybe m (\y -> insert k y m) (f v w) +{-# INLINABLE differenceWith #-} + +-- | /O(n+m)/ Intersection of two maps. If a key occurs in both maps +-- the provided function is used to combine the values from the two +-- maps. +intersectionWith :: (Eq k, Hashable k) => (v1 -> v2 -> v3) -> HashMap k v1 + -> HashMap k v2 -> HashMap k v3 +intersectionWith f a b = foldlWithKey' go empty a + where + go m k v = case HM.lookup k b of + Just w -> insert k (f v w) m + _ -> m +{-# INLINABLE intersectionWith #-} + +-- | /O(n+m)/ Intersection of two maps. If a key occurs in both maps +-- the provided function is used to combine the values from the two +-- maps. +intersectionWithKey :: (Eq k, Hashable k) => (k -> v1 -> v2 -> v3) + -> HashMap k v1 -> HashMap k v2 -> HashMap k v3 +intersectionWithKey f a b = foldlWithKey' go empty a + where + go m k v = case HM.lookup k b of + Just w -> insert k (f k v w) m + _ -> m +{-# INLINABLE intersectionWithKey #-} + +------------------------------------------------------------------------ +-- ** Lists + +-- | /O(n*log n)/ Construct a map with the supplied mappings. If the +-- list contains duplicate mappings, the later mappings take +-- precedence. +fromList :: (Eq k, Hashable k) => [(k, v)] -> HashMap k v +fromList = L.foldl' (\ m (k, !v) -> HM.unsafeInsert k v m) empty +{-# INLINABLE fromList #-} + +-- | /O(n*log n)/ Construct a map from a list of elements. Uses +-- the provided function f to merge duplicate entries (f newVal oldVal). +-- +-- For example: +-- +-- > fromListWith (+) [ (x, 1) | x <- xs ] +-- +-- will create a map with number of occurrences of each element in xs. +-- +-- > fromListWith (++) [ (k, [v]) | (k, v) <- xs ] +-- +-- will group all values by their keys in a list 'xs :: [(k, v)]' and +-- return a 'HashMap k [v]'. +fromListWith :: (Eq k, Hashable k) => (v -> v -> v) -> [(k, v)] -> HashMap k v +fromListWith f = L.foldl' (\ m (k, v) -> unsafeInsertWith f k v m) empty +{-# INLINE fromListWith #-} + +------------------------------------------------------------------------ +-- Array operations + +updateWith :: Eq k => (v -> v) -> k -> A.Array (Leaf k v) -> A.Array (Leaf k v) +updateWith f k0 ary0 = go k0 ary0 0 (A.length ary0) + where + go !k !ary !i !n + | i >= n = ary + | otherwise = case A.index ary i of + (L kx y) | k == kx -> let !v' = f y in A.update ary i (L k v') + | otherwise -> go k ary (i+1) n +{-# INLINABLE updateWith #-} + +-- | Append the given key and value to the array. If the key is +-- already present, instead update the value of the key by applying +-- the given function to the new and old value (in that order). The +-- value is always evaluated to WHNF before being inserted into the +-- array. +updateOrSnocWith :: Eq k => (v -> v -> v) -> k -> v -> A.Array (Leaf k v) + -> A.Array (Leaf k v) +updateOrSnocWith f = updateOrSnocWithKey (const f) +{-# INLINABLE updateOrSnocWith #-} + +-- | Append the given key and value to the array. If the key is +-- already present, instead update the value of the key by applying +-- the given function to the new and old value (in that order). The +-- value is always evaluated to WHNF before being inserted into the +-- array. +updateOrSnocWithKey :: Eq k => (k -> v -> v -> v) -> k -> v -> A.Array (Leaf k v) + -> A.Array (Leaf k v) +updateOrSnocWithKey f k0 v0 ary0 = go k0 v0 ary0 0 (A.length ary0) + where + go !k v !ary !i !n + | i >= n = A.run $ do + -- Not found, append to the end. + mary <- A.new_ (n + 1) + A.copy ary 0 mary 0 n + let !l = v `seq` (L k v) + A.write mary n l + return mary + | otherwise = case A.index ary i of + (L kx y) | k == kx -> let !v' = f k v y in A.update ary i (L k v') + | otherwise -> go k v ary (i+1) n +{-# INLINABLE updateOrSnocWithKey #-} + +------------------------------------------------------------------------ +-- Smart constructors +-- +-- These constructors make sure the value is in WHNF before it's +-- inserted into the constructor. + +leaf :: Hash -> k -> v -> HashMap k v +leaf h k = \ !v -> Leaf h (L k v) +{-# INLINE leaf #-} diff --git a/Data/HashMap/Unsafe.hs b/Data/HashMap/Unsafe.hs new file mode 100644 index 0000000..382d5db --- /dev/null +++ b/Data/HashMap/Unsafe.hs @@ -0,0 +1,39 @@ +{-# LANGUAGE CPP #-} + +#if !MIN_VERSION_base(4,9,0) +{-# LANGUAGE MagicHash, Rank2Types, UnboxedTuples #-} +#endif + +-- | This module exports a workaround for this bug: +-- +-- http://hackage.haskell.org/trac/ghc/ticket/5916 +-- +-- Please read the comments in ghc/libraries/base/GHC/ST.lhs to +-- understand what's going on here. +-- +-- Code that uses this module should be compiled with -fno-full-laziness +module Data.HashMap.Unsafe + ( runST + ) where + +#if MIN_VERSION_base(4,9,0) +-- The GHC issue was fixed in GHC 8.0/base 4.9 +import Control.Monad.ST + +#else + +import GHC.Base (realWorld#) +import qualified GHC.ST as ST + +-- | Return the value computed by a state transformer computation. +-- The @forall@ ensures that the internal state used by the 'ST' +-- computation is inaccessible to the rest of the program. +runST :: (forall s. ST.ST s a) -> a +runST st = runSTRep (case st of { ST.ST st_rep -> st_rep }) +{-# INLINE runST #-} + +runSTRep :: (forall s. ST.STRep s a) -> a +runSTRep st_rep = case st_rep realWorld# of + (# _, r #) -> r +{-# INLINE [0] runSTRep #-} +#endif diff --git a/Data/HashMap/UnsafeShift.hs b/Data/HashMap/UnsafeShift.hs new file mode 100644 index 0000000..529ba50 --- /dev/null +++ b/Data/HashMap/UnsafeShift.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE MagicHash #-} + +module Data.HashMap.UnsafeShift + ( unsafeShiftL + , unsafeShiftR + ) where + +import GHC.Exts (Word(W#), Int(I#), uncheckedShiftL#, uncheckedShiftRL#) + +unsafeShiftL :: Word -> Int -> Word +unsafeShiftL (W# x#) (I# i#) = W# (x# `uncheckedShiftL#` i#) +{-# INLINE unsafeShiftL #-} + +unsafeShiftR :: Word -> Int -> Word +unsafeShiftR (W# x#) (I# i#) = W# (x# `uncheckedShiftRL#` i#) +{-# INLINE unsafeShiftR #-} diff --git a/Data/HashSet.hs b/Data/HashSet.hs new file mode 100644 index 0000000..a7dda9f --- /dev/null +++ b/Data/HashSet.hs @@ -0,0 +1,72 @@ +{-# LANGUAGE CPP #-} +#if __GLASGOW_HASKELL__ >= 702 +{-# LANGUAGE Safe #-} +#endif + +------------------------------------------------------------------------ +-- | +-- Module : Data.HashSet +-- Copyright : 2011 Bryan O'Sullivan +-- License : BSD-style +-- Maintainer : johan.tibell@gmail.com +-- Stability : provisional +-- Portability : portable +-- +-- A set of /hashable/ values. A set cannot contain duplicate items. +-- A 'HashSet' makes no guarantees as to the order of its elements. +-- +-- The implementation is based on /hash array mapped trie/. A +-- 'HashSet' is often faster than other tree-based set types, +-- especially when value comparison is expensive, as in the case of +-- strings. +-- +-- Many operations have a average-case complexity of /O(log n)/. The +-- implementation uses a large base (i.e. 16) so in practice these +-- operations are constant time. + +module Data.HashSet + ( + HashSet + + -- * Construction + , empty + , singleton + + -- * Combine + , union + , unions + + -- * Basic interface + , null + , size + , member + , insert + , delete + + -- * Transformations + , map + + -- * Difference and intersection + , difference + , intersection + + -- * Folds + , foldl' + , foldr + + -- * Filter + , filter + + -- * Conversions + + -- ** Lists + , toList + , fromList + + -- * HashMaps + , toMap + , fromMap + ) where + +import Data.HashSet.Base +import Prelude () diff --git a/Data/HashSet/Base.hs b/Data/HashSet/Base.hs new file mode 100644 index 0000000..0929420 --- /dev/null +++ b/Data/HashSet/Base.hs @@ -0,0 +1,327 @@ +{-# LANGUAGE CPP, DeriveDataTypeable #-} +#if __GLASGOW_HASKELL__ >= 708 +{-# LANGUAGE RoleAnnotations #-} +{-# LANGUAGE TypeFamilies #-} +#endif +#if __GLASGOW_HASKELL__ >= 702 +{-# LANGUAGE Trustworthy #-} +#endif + +------------------------------------------------------------------------ +-- | +-- Module : Data.HashSet.Base +-- Copyright : 2011 Bryan O'Sullivan +-- License : BSD-style +-- Maintainer : johan.tibell@gmail.com +-- Stability : provisional +-- Portability : portable +-- +-- A set of /hashable/ values. A set cannot contain duplicate items. +-- A 'HashSet' makes no guarantees as to the order of its elements. +-- +-- The implementation is based on /hash array mapped trie/. A +-- 'HashSet' is often faster than other tree-based set types, +-- especially when value comparison is expensive, as in the case of +-- strings. +-- +-- Many operations have a average-case complexity of /O(log n)/. The +-- implementation uses a large base (i.e. 16) so in practice these +-- operations are constant time. + +module Data.HashSet.Base + ( + HashSet + + -- * Construction + , empty + , singleton + + -- * Combine + , union + , unions + + -- * Basic interface + , null + , size + , member + , insert + , delete + + -- * Transformations + , map + + -- * Difference and intersection + , difference + , intersection + + -- * Folds + , foldl' + , foldr + + -- * Filter + , filter + + -- * Conversions + + -- ** Lists + , toList + , fromList + + -- * HashMaps + , toMap + , fromMap + + -- Exported from Data.HashMap.{Strict, Lazy} + , keysSet + ) where + +import Control.DeepSeq (NFData(..)) +import Data.Data hiding (Typeable) +import Data.HashMap.Base (HashMap, foldrWithKey, equalKeys, equalKeys1) +import Data.Hashable (Hashable(hashWithSalt)) +#if __GLASGOW_HASKELL__ >= 711 +import Data.Semigroup (Semigroup(..)) +#elif __GLASGOW_HASKELL__ < 709 +import Data.Monoid (Monoid(..)) +#endif +import GHC.Exts (build) +import Prelude hiding (filter, foldr, map, null) +import qualified Data.Foldable as Foldable +import qualified Data.HashMap.Base as H +import qualified Data.List as List +import Data.Typeable (Typeable) +import Text.Read + +#if __GLASGOW_HASKELL__ >= 708 +import qualified GHC.Exts as Exts +#endif + +#if MIN_VERSION_base(4,9,0) +import Data.Functor.Classes +#endif + +#if MIN_VERSION_hashable(1,2,5) +import qualified Data.Hashable.Lifted as H +#endif + +import Data.Functor ((<$)) + +-- | A set of values. A set cannot contain duplicate values. +newtype HashSet a = HashSet { + asMap :: HashMap a () + } deriving (Typeable) + +#if __GLASGOW_HASKELL__ >= 708 +type role HashSet nominal +#endif + +instance (NFData a) => NFData (HashSet a) where + rnf = rnf . asMap + {-# INLINE rnf #-} + +instance (Eq a) => Eq (HashSet a) where + HashSet a == HashSet b = equalKeys a b + {-# INLINE (==) #-} + +#if MIN_VERSION_base(4,9,0) +instance Eq1 HashSet where + liftEq eq (HashSet a) (HashSet b) = equalKeys1 eq a b +#endif + +instance (Ord a) => Ord (HashSet a) where + compare (HashSet a) (HashSet b) = compare a b + {-# INLINE compare #-} + +#if MIN_VERSION_base(4,9,0) +instance Ord1 HashSet where + liftCompare c (HashSet a) (HashSet b) = liftCompare2 c compare a b +#endif + +instance Foldable.Foldable HashSet where + foldr = Data.HashSet.Base.foldr + {-# INLINE foldr #-} + +#if __GLASGOW_HASKELL__ >= 711 +instance (Hashable a, Eq a) => Semigroup (HashSet a) where + (<>) = union + {-# INLINE (<>) #-} +#endif + +instance (Hashable a, Eq a) => Monoid (HashSet a) where + mempty = empty + {-# INLINE mempty #-} +#if __GLASGOW_HASKELL__ >= 711 + mappend = (<>) +#else + mappend = union +#endif + {-# INLINE mappend #-} + +instance (Eq a, Hashable a, Read a) => Read (HashSet a) where + readPrec = parens $ prec 10 $ do + Ident "fromList" <- lexP + xs <- readPrec + return (fromList xs) + + readListPrec = readListPrecDefault + +#if MIN_VERSION_base(4,9,0) +instance Show1 HashSet where + liftShowsPrec sp sl d m = + showsUnaryWith (liftShowsPrec sp sl) "fromList" d (toList m) +#endif + +instance (Show a) => Show (HashSet a) where + showsPrec d m = showParen (d > 10) $ + showString "fromList " . shows (toList m) + +instance (Data a, Eq a, Hashable a) => Data (HashSet a) where + gfoldl f z m = z fromList `f` toList m + toConstr _ = fromListConstr + gunfold k z c = case constrIndex c of + 1 -> k (z fromList) + _ -> error "gunfold" + dataTypeOf _ = hashSetDataType + dataCast1 f = gcast1 f + +#if MIN_VERSION_hashable(1,2,6) +instance H.Hashable1 HashSet where + liftHashWithSalt h s = H.liftHashWithSalt2 h hashWithSalt s . asMap +#endif + +instance (Hashable a) => Hashable (HashSet a) where + hashWithSalt salt = hashWithSalt salt . asMap + +fromListConstr :: Constr +fromListConstr = mkConstr hashSetDataType "fromList" [] Prefix + +hashSetDataType :: DataType +hashSetDataType = mkDataType "Data.HashSet.Base.HashSet" [fromListConstr] + +-- | /O(1)/ Construct an empty set. +empty :: HashSet a +empty = HashSet H.empty + +-- | /O(1)/ Construct a set with a single element. +singleton :: Hashable a => a -> HashSet a +singleton a = HashSet (H.singleton a ()) +{-# INLINABLE singleton #-} + +-- | /O(1)/ Convert to the equivalent 'HashMap'. +toMap :: HashSet a -> HashMap a () +toMap = asMap + +-- | /O(1)/ Convert from the equivalent 'HashMap'. +fromMap :: HashMap a () -> HashSet a +fromMap = HashSet + +-- | /O(n)/ Produce a 'HashSet' of all the keys in the given 'HashMap'. +-- +-- @since 0.2.10.0 +keysSet :: HashMap k a -> HashSet k +keysSet m = fromMap (() <$ m) + +-- | /O(n+m)/ Construct a set containing all elements from both sets. +-- +-- To obtain good performance, the smaller set must be presented as +-- the first argument. +union :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a +union s1 s2 = HashSet $ H.union (asMap s1) (asMap s2) +{-# INLINE union #-} + +-- TODO: Figure out the time complexity of 'unions'. + +-- | Construct a set containing all elements from a list of sets. +unions :: (Eq a, Hashable a) => [HashSet a] -> HashSet a +unions = List.foldl' union empty +{-# INLINE unions #-} + +-- | /O(1)/ Return 'True' if this set is empty, 'False' otherwise. +null :: HashSet a -> Bool +null = H.null . asMap +{-# INLINE null #-} + +-- | /O(n)/ Return the number of elements in this set. +size :: HashSet a -> Int +size = H.size . asMap +{-# INLINE size #-} + +-- | /O(log n)/ Return 'True' if the given value is present in this +-- set, 'False' otherwise. +member :: (Eq a, Hashable a) => a -> HashSet a -> Bool +member a s = case H.lookup a (asMap s) of + Just _ -> True + _ -> False +{-# INLINABLE member #-} + +-- | /O(log n)/ Add the specified value to this set. +insert :: (Eq a, Hashable a) => a -> HashSet a -> HashSet a +insert a = HashSet . H.insert a () . asMap +{-# INLINABLE insert #-} + +-- | /O(log n)/ Remove the specified value from this set if +-- present. +delete :: (Eq a, Hashable a) => a -> HashSet a -> HashSet a +delete a = HashSet . H.delete a . asMap +{-# INLINABLE delete #-} + +-- | /O(n)/ Transform this set by applying a function to every value. +-- The resulting set may be smaller than the source. +map :: (Hashable b, Eq b) => (a -> b) -> HashSet a -> HashSet b +map f = fromList . List.map f . toList +{-# INLINE map #-} + +-- | /O(n)/ Difference of two sets. Return elements of the first set +-- not existing in the second. +difference :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a +difference (HashSet a) (HashSet b) = HashSet (H.difference a b) +{-# INLINABLE difference #-} + +-- | /O(n)/ Intersection of two sets. Return elements present in both +-- the first set and the second. +intersection :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a +intersection (HashSet a) (HashSet b) = HashSet (H.intersection a b) +{-# INLINABLE intersection #-} + +-- | /O(n)/ Reduce this set by applying a binary operator to all +-- elements, using the given starting value (typically the +-- left-identity of the operator). Each application of the operator +-- is evaluated before before using the result in the next +-- application. This function is strict in the starting value. +foldl' :: (a -> b -> a) -> a -> HashSet b -> a +foldl' f z0 = H.foldlWithKey' g z0 . asMap + where g z k _ = f z k +{-# INLINE foldl' #-} + +-- | /O(n)/ Reduce this set by applying a binary operator to all +-- elements, using the given starting value (typically the +-- right-identity of the operator). +foldr :: (b -> a -> a) -> a -> HashSet b -> a +foldr f z0 = foldrWithKey g z0 . asMap + where g k _ z = f k z +{-# INLINE foldr #-} + +-- | /O(n)/ Filter this set by retaining only elements satisfying a +-- predicate. +filter :: (a -> Bool) -> HashSet a -> HashSet a +filter p = HashSet . H.filterWithKey q . asMap + where q k _ = p k +{-# INLINE filter #-} + +-- | /O(n)/ Return a list of this set's elements. The list is +-- produced lazily. +toList :: HashSet a -> [a] +toList t = build (\ c z -> foldrWithKey ((const .) c) z (asMap t)) +{-# INLINE toList #-} + +-- | /O(n*min(W, n))/ Construct a set from a list of elements. +fromList :: (Eq a, Hashable a) => [a] -> HashSet a +fromList = HashSet . List.foldl' (\ m k -> H.insert k () m) H.empty +{-# INLINE fromList #-} + +#if __GLASGOW_HASKELL__ >= 708 +instance (Eq a, Hashable a) => Exts.IsList (HashSet a) where + type Item (HashSet a) = a + fromList = fromList + toList = toList +#endif diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5eb7e1b --- /dev/null +++ b/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2010, Johan Tibell + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Johan Tibell nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/benchmarks/Benchmarks.hs b/benchmarks/Benchmarks.hs new file mode 100644 index 0000000..d5cf494 --- /dev/null +++ b/benchmarks/Benchmarks.hs @@ -0,0 +1,453 @@ +{-# LANGUAGE CPP, DeriveGeneric, GADTs, PackageImports, RecordWildCards #-} + +module Main where + +import Control.DeepSeq +import Control.DeepSeq.Generics (genericRnf) +import Criterion.Main (bench, bgroup, defaultMain, env, nf, whnf) +import Data.Bits ((.&.)) +import Data.Functor.Identity +import Data.Hashable (Hashable) +import qualified Data.ByteString as BS +import qualified "hashmap" Data.HashMap as IHM +import qualified Data.HashMap.Strict as HM +import qualified Data.IntMap as IM +import qualified Data.Map as M +import Data.List (foldl') +import Data.Maybe (fromMaybe) +import GHC.Generics (Generic) +import Prelude hiding (lookup) + +import qualified Util.ByteString as UBS +import qualified Util.Int as UI +import qualified Util.String as US + +#if !MIN_VERSION_bytestring(0,10,0) +instance NFData BS.ByteString +#endif + +data B where + B :: NFData a => a -> B + +instance NFData B where + rnf (B b) = rnf b + +-- TODO: This a stopgap measure to keep the benchmark work with +-- Criterion 1.0. +data Env = Env { + n :: !Int, + + elems :: ![(String, Int)], + keys :: ![String], + elemsBS :: ![(BS.ByteString, Int)], + keysBS :: ![BS.ByteString], + elemsI :: ![(Int, Int)], + keysI :: ![Int], + elemsI2 :: ![(Int, Int)], -- for union + + keys' :: ![String], + keysBS' :: ![BS.ByteString], + keysI' :: ![Int], + + keysDup :: ![String], + keysDupBS :: ![BS.ByteString], + keysDupI :: ![Int], + elemsDup :: ![(String, Int)], + elemsDupBS :: ![(BS.ByteString, Int)], + elemsDupI :: ![(Int, Int)], + + hm :: !(HM.HashMap String Int), + hmbs :: !(HM.HashMap BS.ByteString Int), + hmi :: !(HM.HashMap Int Int), + hmi2 :: !(HM.HashMap Int Int), + m :: !(M.Map String Int), + mbs :: !(M.Map BS.ByteString Int), + im :: !(IM.IntMap Int), + ihm :: !(IHM.Map String Int), + ihmbs :: !(IHM.Map BS.ByteString Int) + } deriving Generic + +instance NFData Env where rnf = genericRnf + +setupEnv :: IO Env +setupEnv = do + let n = 2^(12 :: Int) + + elems = zip keys [1..n] + keys = US.rnd 8 n + elemsBS = zip keysBS [1..n] + keysBS = UBS.rnd 8 n + elemsI = zip keysI [1..n] + keysI = UI.rnd (n+n) n + elemsI2 = zip [n `div` 2..n + (n `div` 2)] [1..n] -- for union + + keys' = US.rnd' 8 n + keysBS' = UBS.rnd' 8 n + keysI' = UI.rnd' (n+n) n + + keysDup = US.rnd 2 n + keysDupBS = UBS.rnd 2 n + keysDupI = UI.rnd (n`div`4) n + elemsDup = zip keysDup [1..n] + elemsDupBS = zip keysDupBS [1..n] + elemsDupI = zip keysDupI [1..n] + + hm = HM.fromList elems + hmbs = HM.fromList elemsBS + hmi = HM.fromList elemsI + hmi2 = HM.fromList elemsI2 + m = M.fromList elems + mbs = M.fromList elemsBS + im = IM.fromList elemsI + ihm = IHM.fromList elems + ihmbs = IHM.fromList elemsBS + return Env{..} + +main :: IO () +main = do + defaultMain + [ + env setupEnv $ \ ~(Env{..}) -> + -- * Comparison to other data structures + -- ** Map + bgroup "Map" + [ bgroup "lookup" + [ bench "String" $ whnf (lookupM keys) m + , bench "ByteString" $ whnf (lookupM keysBS) mbs + ] + , bgroup "lookup-miss" + [ bench "String" $ whnf (lookupM keys') m + , bench "ByteString" $ whnf (lookupM keysBS') mbs + ] + , bgroup "insert" + [ bench "String" $ whnf (insertM elems) M.empty + , bench "ByteStringString" $ whnf (insertM elemsBS) M.empty + ] + , bgroup "insert-dup" + [ bench "String" $ whnf (insertM elems) m + , bench "ByteStringString" $ whnf (insertM elemsBS) mbs + ] + , bgroup "delete" + [ bench "String" $ whnf (deleteM keys) m + , bench "ByteString" $ whnf (deleteM keysBS) mbs + ] + , bgroup "delete-miss" + [ bench "String" $ whnf (deleteM keys') m + , bench "ByteString" $ whnf (deleteM keysBS') mbs + ] + , bgroup "size" + [ bench "String" $ whnf M.size m + , bench "ByteString" $ whnf M.size mbs + ] + , bgroup "fromList" + [ bench "String" $ whnf M.fromList elems + , bench "ByteString" $ whnf M.fromList elemsBS + ] + ] + + -- ** Map from the hashmap package + , env setupEnv $ \ ~(Env{..}) -> + bgroup "hashmap/Map" + [ bgroup "lookup" + [ bench "String" $ whnf (lookupIHM keys) ihm + , bench "ByteString" $ whnf (lookupIHM keysBS) ihmbs + ] + , bgroup "lookup-miss" + [ bench "String" $ whnf (lookupIHM keys') ihm + , bench "ByteString" $ whnf (lookupIHM keysBS') ihmbs + ] + , bgroup "insert" + [ bench "String" $ whnf (insertIHM elems) IHM.empty + , bench "ByteStringString" $ whnf (insertIHM elemsBS) IHM.empty + ] + , bgroup "insert-dup" + [ bench "String" $ whnf (insertIHM elems) ihm + , bench "ByteStringString" $ whnf (insertIHM elemsBS) ihmbs + ] + , bgroup "delete" + [ bench "String" $ whnf (deleteIHM keys) ihm + , bench "ByteString" $ whnf (deleteIHM keysBS) ihmbs + ] + , bgroup "delete-miss" + [ bench "String" $ whnf (deleteIHM keys') ihm + , bench "ByteString" $ whnf (deleteIHM keysBS') ihmbs + ] + , bgroup "size" + [ bench "String" $ whnf IHM.size ihm + , bench "ByteString" $ whnf IHM.size ihmbs + ] + , bgroup "fromList" + [ bench "String" $ whnf IHM.fromList elems + , bench "ByteString" $ whnf IHM.fromList elemsBS + ] + ] + + -- ** IntMap + , env setupEnv $ \ ~(Env{..}) -> + bgroup "IntMap" + [ bench "lookup" $ whnf (lookupIM keysI) im + , bench "lookup-miss" $ whnf (lookupIM keysI') im + , bench "insert" $ whnf (insertIM elemsI) IM.empty + , bench "insert-dup" $ whnf (insertIM elemsI) im + , bench "delete" $ whnf (deleteIM keysI) im + , bench "delete-miss" $ whnf (deleteIM keysI') im + , bench "size" $ whnf IM.size im + , bench "fromList" $ whnf IM.fromList elemsI + ] + + , env setupEnv $ \ ~(Env{..}) -> + bgroup "HashMap" + [ -- * Basic interface + bgroup "lookup" + [ bench "String" $ whnf (lookup keys) hm + , bench "ByteString" $ whnf (lookup keysBS) hmbs + , bench "Int" $ whnf (lookup keysI) hmi + ] + , bgroup "lookup-miss" + [ bench "String" $ whnf (lookup keys') hm + , bench "ByteString" $ whnf (lookup keysBS') hmbs + , bench "Int" $ whnf (lookup keysI') hmi + ] + , bgroup "insert" + [ bench "String" $ whnf (insert elems) HM.empty + , bench "ByteString" $ whnf (insert elemsBS) HM.empty + , bench "Int" $ whnf (insert elemsI) HM.empty + ] + , bgroup "insert-dup" + [ bench "String" $ whnf (insert elems) hm + , bench "ByteString" $ whnf (insert elemsBS) hmbs + , bench "Int" $ whnf (insert elemsI) hmi + ] + , bgroup "delete" + [ bench "String" $ whnf (delete keys) hm + , bench "ByteString" $ whnf (delete keysBS) hmbs + , bench "Int" $ whnf (delete keysI) hmi + ] + , bgroup "delete-miss" + [ bench "String" $ whnf (delete keys') hm + , bench "ByteString" $ whnf (delete keysBS') hmbs + , bench "Int" $ whnf (delete keysI') hmi + ] + , bgroup "alterInsert" + [ bench "String" $ whnf (alterInsert elems) HM.empty + , bench "ByteString" $ whnf (alterInsert elemsBS) HM.empty + , bench "Int" $ whnf (alterInsert elemsI) HM.empty + ] + , bgroup "alterFInsert" + [ bench "String" $ whnf (alterFInsert elems) HM.empty + , bench "ByteString" $ whnf (alterFInsert elemsBS) HM.empty + , bench "Int" $ whnf (alterFInsert elemsI) HM.empty + ] + , bgroup "alterInsert-dup" + [ bench "String" $ whnf (alterInsert elems) hm + , bench "ByteString" $ whnf (alterInsert elemsBS) hmbs + , bench "Int" $ whnf (alterInsert elemsI) hmi + ] + , bgroup "alterFInsert-dup" + [ bench "String" $ whnf (alterFInsert elems) hm + , bench "ByteString" $ whnf (alterFInsert elemsBS) hmbs + , bench "Int" $ whnf (alterFInsert elemsI) hmi + ] + , bgroup "alterDelete" + [ bench "String" $ whnf (alterDelete keys) hm + , bench "ByteString" $ whnf (alterDelete keysBS) hmbs + , bench "Int" $ whnf (alterDelete keysI) hmi + ] + , bgroup "alterFDelete" + [ bench "String" $ whnf (alterFDelete keys) hm + , bench "ByteString" $ whnf (alterFDelete keysBS) hmbs + , bench "Int" $ whnf (alterFDelete keysI) hmi + ] + , bgroup "alterDelete-miss" + [ bench "String" $ whnf (alterDelete keys') hm + , bench "ByteString" $ whnf (alterDelete keysBS') hmbs + , bench "Int" $ whnf (alterDelete keysI') hmi + ] + , bgroup "alterFDelete-miss" + [ bench "String" $ whnf (alterFDelete keys') hm + , bench "ByteString" $ whnf (alterFDelete keysBS') hmbs + , bench "Int" $ whnf (alterFDelete keysI') hmi + ] + + -- Combine + , bench "union" $ whnf (HM.union hmi) hmi2 + + -- Transformations + , bench "map" $ whnf (HM.map (\ v -> v + 1)) hmi + + -- * Difference and intersection + , bench "difference" $ whnf (HM.difference hmi) hmi2 + , bench "intersection" $ whnf (HM.intersection hmi) hmi2 + + -- Folds + , bench "foldl'" $ whnf (HM.foldl' (+) 0) hmi + , bench "foldr" $ nf (HM.foldr (:) []) hmi + + -- Filter + , bench "filter" $ whnf (HM.filter (\ v -> v .&. 1 == 0)) hmi + , bench "filterWithKey" $ whnf (HM.filterWithKey (\ k _ -> k .&. 1 == 0)) hmi + + -- Size + , bgroup "size" + [ bench "String" $ whnf HM.size hm + , bench "ByteString" $ whnf HM.size hmbs + , bench "Int" $ whnf HM.size hmi + ] + + -- fromList + , bgroup "fromList" + [ bgroup "long" + [ bench "String" $ whnf HM.fromList elems + , bench "ByteString" $ whnf HM.fromList elemsBS + , bench "Int" $ whnf HM.fromList elemsI + ] + , bgroup "short" + [ bench "String" $ whnf HM.fromList elemsDup + , bench "ByteString" $ whnf HM.fromList elemsDupBS + , bench "Int" $ whnf HM.fromList elemsDupI + ] + ] + -- fromListWith + , bgroup "fromListWith" + [ bgroup "long" + [ bench "String" $ whnf (HM.fromListWith (+)) elems + , bench "ByteString" $ whnf (HM.fromListWith (+)) elemsBS + , bench "Int" $ whnf (HM.fromListWith (+)) elemsI + ] + , bgroup "short" + [ bench "String" $ whnf (HM.fromListWith (+)) elemsDup + , bench "ByteString" $ whnf (HM.fromListWith (+)) elemsDupBS + , bench "Int" $ whnf (HM.fromListWith (+)) elemsDupI + ] + ] + ] + ] + +------------------------------------------------------------------------ +-- * HashMap + +lookup :: (Eq k, Hashable k) => [k] -> HM.HashMap k Int -> Int +lookup xs m = foldl' (\z k -> fromMaybe z (HM.lookup k m)) 0 xs +{-# SPECIALIZE lookup :: [Int] -> HM.HashMap Int Int -> Int #-} +{-# SPECIALIZE lookup :: [String] -> HM.HashMap String Int -> Int #-} +{-# SPECIALIZE lookup :: [BS.ByteString] -> HM.HashMap BS.ByteString Int + -> Int #-} + +insert :: (Eq k, Hashable k) => [(k, Int)] -> HM.HashMap k Int + -> HM.HashMap k Int +insert xs m0 = foldl' (\m (k, v) -> HM.insert k v m) m0 xs +{-# SPECIALIZE insert :: [(Int, Int)] -> HM.HashMap Int Int + -> HM.HashMap Int Int #-} +{-# SPECIALIZE insert :: [(String, Int)] -> HM.HashMap String Int + -> HM.HashMap String Int #-} +{-# SPECIALIZE insert :: [(BS.ByteString, Int)] -> HM.HashMap BS.ByteString Int + -> HM.HashMap BS.ByteString Int #-} + +delete :: (Eq k, Hashable k) => [k] -> HM.HashMap k Int -> HM.HashMap k Int +delete xs m0 = foldl' (\m k -> HM.delete k m) m0 xs +{-# SPECIALIZE delete :: [Int] -> HM.HashMap Int Int -> HM.HashMap Int Int #-} +{-# SPECIALIZE delete :: [String] -> HM.HashMap String Int + -> HM.HashMap String Int #-} +{-# SPECIALIZE delete :: [BS.ByteString] -> HM.HashMap BS.ByteString Int + -> HM.HashMap BS.ByteString Int #-} + +alterInsert :: (Eq k, Hashable k) => [(k, Int)] -> HM.HashMap k Int + -> HM.HashMap k Int +alterInsert xs m0 = + foldl' (\m (k, v) -> HM.alter (const . Just $ v) k m) m0 xs +{-# SPECIALIZE alterInsert :: [(Int, Int)] -> HM.HashMap Int Int + -> HM.HashMap Int Int #-} +{-# SPECIALIZE alterInsert :: [(String, Int)] -> HM.HashMap String Int + -> HM.HashMap String Int #-} +{-# SPECIALIZE alterInsert :: [(BS.ByteString, Int)] -> HM.HashMap BS.ByteString Int + -> HM.HashMap BS.ByteString Int #-} + +alterDelete :: (Eq k, Hashable k) => [k] -> HM.HashMap k Int + -> HM.HashMap k Int +alterDelete xs m0 = + foldl' (\m k -> HM.alter (const Nothing) k m) m0 xs +{-# SPECIALIZE alterDelete :: [Int] -> HM.HashMap Int Int + -> HM.HashMap Int Int #-} +{-# SPECIALIZE alterDelete :: [String] -> HM.HashMap String Int + -> HM.HashMap String Int #-} +{-# SPECIALIZE alterDelete :: [BS.ByteString] -> HM.HashMap BS.ByteString Int + -> HM.HashMap BS.ByteString Int #-} + +alterFInsert :: (Eq k, Hashable k) => [(k, Int)] -> HM.HashMap k Int + -> HM.HashMap k Int +alterFInsert xs m0 = + foldl' (\m (k, v) -> runIdentity $ HM.alterF (const . Identity . Just $ v) k m) m0 xs +{-# SPECIALIZE alterFInsert :: [(Int, Int)] -> HM.HashMap Int Int + -> HM.HashMap Int Int #-} +{-# SPECIALIZE alterFInsert :: [(String, Int)] -> HM.HashMap String Int + -> HM.HashMap String Int #-} +{-# SPECIALIZE alterFInsert :: [(BS.ByteString, Int)] -> HM.HashMap BS.ByteString Int + -> HM.HashMap BS.ByteString Int #-} + +alterFDelete :: (Eq k, Hashable k) => [k] -> HM.HashMap k Int + -> HM.HashMap k Int +alterFDelete xs m0 = + foldl' (\m k -> runIdentity $ HM.alterF (const . Identity $ Nothing) k m) m0 xs +{-# SPECIALIZE alterFDelete :: [Int] -> HM.HashMap Int Int + -> HM.HashMap Int Int #-} +{-# SPECIALIZE alterFDelete :: [String] -> HM.HashMap String Int + -> HM.HashMap String Int #-} +{-# SPECIALIZE alterFDelete :: [BS.ByteString] -> HM.HashMap BS.ByteString Int + -> HM.HashMap BS.ByteString Int #-} + +------------------------------------------------------------------------ +-- * Map + +lookupM :: Ord k => [k] -> M.Map k Int -> Int +lookupM xs m = foldl' (\z k -> fromMaybe z (M.lookup k m)) 0 xs +{-# SPECIALIZE lookupM :: [String] -> M.Map String Int -> Int #-} +{-# SPECIALIZE lookupM :: [BS.ByteString] -> M.Map BS.ByteString Int -> Int #-} + +insertM :: Ord k => [(k, Int)] -> M.Map k Int -> M.Map k Int +insertM xs m0 = foldl' (\m (k, v) -> M.insert k v m) m0 xs +{-# SPECIALIZE insertM :: [(String, Int)] -> M.Map String Int + -> M.Map String Int #-} +{-# SPECIALIZE insertM :: [(BS.ByteString, Int)] -> M.Map BS.ByteString Int + -> M.Map BS.ByteString Int #-} + +deleteM :: Ord k => [k] -> M.Map k Int -> M.Map k Int +deleteM xs m0 = foldl' (\m k -> M.delete k m) m0 xs +{-# SPECIALIZE deleteM :: [String] -> M.Map String Int -> M.Map String Int #-} +{-# SPECIALIZE deleteM :: [BS.ByteString] -> M.Map BS.ByteString Int + -> M.Map BS.ByteString Int #-} + +------------------------------------------------------------------------ +-- * Map from the hashmap package + +lookupIHM :: (Eq k, Hashable k, Ord k) => [k] -> IHM.Map k Int -> Int +lookupIHM xs m = foldl' (\z k -> fromMaybe z (IHM.lookup k m)) 0 xs +{-# SPECIALIZE lookupIHM :: [String] -> IHM.Map String Int -> Int #-} +{-# SPECIALIZE lookupIHM :: [BS.ByteString] -> IHM.Map BS.ByteString Int + -> Int #-} + +insertIHM :: (Eq k, Hashable k, Ord k) => [(k, Int)] -> IHM.Map k Int + -> IHM.Map k Int +insertIHM xs m0 = foldl' (\m (k, v) -> IHM.insert k v m) m0 xs +{-# SPECIALIZE insertIHM :: [(String, Int)] -> IHM.Map String Int + -> IHM.Map String Int #-} +{-# SPECIALIZE insertIHM :: [(BS.ByteString, Int)] -> IHM.Map BS.ByteString Int + -> IHM.Map BS.ByteString Int #-} + +deleteIHM :: (Eq k, Hashable k, Ord k) => [k] -> IHM.Map k Int -> IHM.Map k Int +deleteIHM xs m0 = foldl' (\m k -> IHM.delete k m) m0 xs +{-# SPECIALIZE deleteIHM :: [String] -> IHM.Map String Int + -> IHM.Map String Int #-} +{-# SPECIALIZE deleteIHM :: [BS.ByteString] -> IHM.Map BS.ByteString Int + -> IHM.Map BS.ByteString Int #-} + +------------------------------------------------------------------------ +-- * IntMap + +lookupIM :: [Int] -> IM.IntMap Int -> Int +lookupIM xs m = foldl' (\z k -> fromMaybe z (IM.lookup k m)) 0 xs + +insertIM :: [(Int, Int)] -> IM.IntMap Int -> IM.IntMap Int +insertIM xs m0 = foldl' (\m (k, v) -> IM.insert k v m) m0 xs + +deleteIM :: [Int] -> IM.IntMap Int -> IM.IntMap Int +deleteIM xs m0 = foldl' (\m k -> IM.delete k m) m0 xs diff --git a/benchmarks/Util/ByteString.hs b/benchmarks/Util/ByteString.hs new file mode 100644 index 0000000..6359889 --- /dev/null +++ b/benchmarks/Util/ByteString.hs @@ -0,0 +1,29 @@ +-- | Benchmarking utilities. For example, functions for generating +-- random 'ByteString's. +module Util.ByteString where + +import qualified Data.ByteString as S +import qualified Data.ByteString.Char8 as C + +import Util.String as String + +-- | Generate a number of fixed length 'ByteString's where the content +-- of the strings are letters in ascending order. +asc :: Int -- ^ Length of each string + -> Int -- ^ Number of strings + -> [S.ByteString] +asc strlen num = map C.pack $ String.asc strlen num + +-- | Generate a number of fixed length 'ByteString's where the content +-- of the strings are letters in random order. +rnd :: Int -- ^ Length of each string + -> Int -- ^ Number of strings + -> [S.ByteString] +rnd strlen num = map C.pack $ String.rnd strlen num + +-- | Generate a number of fixed length 'ByteString's where the content +-- of the strings are letters in random order, different from @rnd@. +rnd' :: Int -- ^ Length of each string + -> Int -- ^ Number of strings + -> [S.ByteString] +rnd' strlen num = map C.pack $ String.rnd' strlen num diff --git a/benchmarks/Util/Int.hs b/benchmarks/Util/Int.hs new file mode 100644 index 0000000..0a44a14 --- /dev/null +++ b/benchmarks/Util/Int.hs @@ -0,0 +1,19 @@ +-- | Benchmarking utilities. For example, functions for generating +-- random integers. +module Util.Int where + +import System.Random (mkStdGen, randomRs) + +-- | Generate a number of uniform random integers in the interval +-- @[0..upper]@. +rnd :: Int -- ^ Upper bound (inclusive) + -> Int -- ^ Number of integers + -> [Int] +rnd upper num = take num $ randomRs (0, upper) $ mkStdGen 1234 + +-- | Generate a number of uniform random integers in the interval +-- @[0..upper]@ different from @rnd@. +rnd' :: Int -- ^ Upper bound (inclusive) + -> Int -- ^ Number of integers + -> [Int] +rnd' upper num = take num $ randomRs (0, upper) $ mkStdGen 5678 diff --git a/benchmarks/Util/String.hs b/benchmarks/Util/String.hs new file mode 100644 index 0000000..c649adf --- /dev/null +++ b/benchmarks/Util/String.hs @@ -0,0 +1,34 @@ +-- | Benchmarking utilities. For example, functions for generating +-- random strings. +module Util.String where + +import System.Random (mkStdGen, randomRs) + +-- | Generate a number of fixed length strings where the content of +-- the strings are letters in ascending order. +asc :: Int -- ^ Length of each string + -> Int -- ^ Number of strings + -> [String] +asc strlen num = take num $ iterate (snd . inc) $ replicate strlen 'a' + where inc [] = (True, []) + inc (c:cs) = case inc cs of (True, cs') | c == 'z' -> (True, 'a' : cs') + | otherwise -> (False, succ c : cs') + (False, cs') -> (False, c : cs') + +-- | Generate a number of fixed length strings where the content of +-- the strings are letters in random order. +rnd :: Int -- ^ Length of each string + -> Int -- ^ Number of strings + -> [String] +rnd strlen num = take num $ split $ randomRs ('a', 'z') $ mkStdGen 1234 + where + split cs = case splitAt strlen cs of (str, cs') -> str : split cs' + +-- | Generate a number of fixed length strings where the content of +-- the strings are letters in random order, different from rnd +rnd' :: Int -- ^ Length of each string + -> Int -- ^ Number of strings + -> [String] +rnd' strlen num = take num $ split $ randomRs ('a', 'z') $ mkStdGen 5678 + where + split cs = case splitAt strlen cs of (str, cs') -> str : split cs' diff --git a/tests/HashMapProperties.hs b/tests/HashMapProperties.hs new file mode 100644 index 0000000..ded3be6 --- /dev/null +++ b/tests/HashMapProperties.hs @@ -0,0 +1,463 @@ +{-# LANGUAGE CPP, GeneralizedNewtypeDeriving #-} + +-- | Tests for the 'Data.HashMap.Lazy' module. We test functions by +-- comparing them to a simpler model, an association list. + +module Main (main) where + +import Control.Monad ( guard ) +import qualified Data.Foldable as Foldable +import Data.Function (on) +import Data.Hashable (Hashable(hashWithSalt)) +import qualified Data.List as L +import Data.Ord (comparing) +#if defined(STRICT) +import qualified Data.HashMap.Strict as HM +import qualified Data.Map.Strict as M +#else +import qualified Data.HashMap.Lazy as HM +import qualified Data.Map.Lazy as M +#endif +import Test.QuickCheck (Arbitrary, Property, (==>), (===)) +import Test.Framework (Test, defaultMain, testGroup) +import Test.Framework.Providers.QuickCheck2 (testProperty) +#if MIN_VERSION_base(4,8,0) +import Data.Functor.Identity (Identity (..)) +#endif +import Control.Applicative (Const (..)) +import Test.QuickCheck.Function (Fun, apply) +import Test.QuickCheck.Poly (A, B) + +-- Key type that generates more hash collisions. +newtype Key = K { unK :: Int } + deriving (Arbitrary, Eq, Ord, Read, Show) + +instance Hashable Key where + hashWithSalt salt k = hashWithSalt salt (unK k) `mod` 20 + +------------------------------------------------------------------------ +-- * Properties + +------------------------------------------------------------------------ +-- ** Instances + +pEq :: [(Key, Int)] -> [(Key, Int)] -> Bool +pEq xs = (M.fromList xs ==) `eq` (HM.fromList xs ==) + +pNeq :: [(Key, Int)] -> [(Key, Int)] -> Bool +pNeq xs = (M.fromList xs /=) `eq` (HM.fromList xs /=) + +-- We cannot compare to `Data.Map` as ordering is different. +pOrd1 :: [(Key, Int)] -> Bool +pOrd1 xs = compare x x == EQ + where + x = HM.fromList xs + +pOrd2 :: [(Key, Int)] -> [(Key, Int)] -> [(Key, Int)] -> Bool +pOrd2 xs ys zs = case (compare x y, compare y z) of + (EQ, o) -> compare x z == o + (o, EQ) -> compare x z == o + (LT, LT) -> compare x z == LT + (GT, GT) -> compare x z == GT + (LT, GT) -> True -- ys greater than xs and zs. + (GT, LT) -> True + where + x = HM.fromList xs + y = HM.fromList ys + z = HM.fromList zs + +pOrd3 :: [(Key, Int)] -> [(Key, Int)] -> Bool +pOrd3 xs ys = case (compare x y, compare y x) of + (EQ, EQ) -> True + (LT, GT) -> True + (GT, LT) -> True + _ -> False + where + x = HM.fromList xs + y = HM.fromList ys + +pOrdEq :: [(Key, Int)] -> [(Key, Int)] -> Bool +pOrdEq xs ys = case (compare x y, x == y) of + (EQ, True) -> True + (LT, False) -> True + (GT, False) -> True + _ -> False + where + x = HM.fromList xs + y = HM.fromList ys + +pReadShow :: [(Key, Int)] -> Bool +pReadShow xs = M.fromList xs == read (show (M.fromList xs)) + +pFunctor :: [(Key, Int)] -> Bool +pFunctor = fmap (+ 1) `eq_` fmap (+ 1) + +pFoldable :: [(Int, Int)] -> Bool +pFoldable = (L.sort . Foldable.foldr (:) []) `eq` + (L.sort . Foldable.foldr (:) []) + +pHashable :: [(Key, Int)] -> [Int] -> Int -> Property +pHashable xs is salt = + x == y ==> hashWithSalt salt x === hashWithSalt salt y + where + xs' = L.nubBy (\(k,_) (k',_) -> k == k') xs + ys = shuffle is xs' + x = HM.fromList xs' + y = HM.fromList ys + -- Shuffle the list using indexes in the second + shuffle :: [Int] -> [a] -> [a] + shuffle idxs = L.map snd + . L.sortBy (comparing fst) + . L.zip (idxs ++ [L.maximum (0:is) + 1 ..]) + +------------------------------------------------------------------------ +-- ** Basic interface + +pSize :: [(Key, Int)] -> Bool +pSize = M.size `eq` HM.size + +pMember :: Key -> [(Key, Int)] -> Bool +pMember k = M.member k `eq` HM.member k + +pLookup :: Key -> [(Key, Int)] -> Bool +pLookup k = M.lookup k `eq` HM.lookup k + +pInsert :: Key -> Int -> [(Key, Int)] -> Bool +pInsert k v = M.insert k v `eq_` HM.insert k v + +pDelete :: Key -> [(Key, Int)] -> Bool +pDelete k = M.delete k `eq_` HM.delete k + +newtype AlwaysCollide = AC Int + deriving (Arbitrary, Eq, Ord, Show) + +instance Hashable AlwaysCollide where + hashWithSalt _ _ = 1 + +-- White-box test that tests the case of deleting one of two keys from +-- a map, where the keys' hash values collide. +pDeleteCollision :: AlwaysCollide -> AlwaysCollide -> AlwaysCollide -> Int + -> Property +pDeleteCollision k1 k2 k3 idx = (k1 /= k2) && (k2 /= k3) && (k1 /= k3) ==> + HM.member toKeep $ HM.delete toDelete $ + HM.fromList [(k1, 1 :: Int), (k2, 2), (k3, 3)] + where + which = idx `mod` 3 + toDelete + | which == 0 = k1 + | which == 1 = k2 + | which == 2 = k3 + | otherwise = error "Impossible" + toKeep + | which == 0 = k2 + | which == 1 = k3 + | which == 2 = k1 + | otherwise = error "Impossible" + +pInsertWith :: Key -> [(Key, Int)] -> Bool +pInsertWith k = M.insertWith (+) k 1 `eq_` HM.insertWith (+) k 1 + +pAdjust :: Key -> [(Key, Int)] -> Bool +pAdjust k = M.adjust succ k `eq_` HM.adjust succ k + +pUpdateAdjust :: Key -> [(Key, Int)] -> Bool +pUpdateAdjust k = M.update (Just . succ) k `eq_` HM.update (Just . succ) k + +pUpdateDelete :: Key -> [(Key, Int)] -> Bool +pUpdateDelete k = M.update (const Nothing) k `eq_` HM.update (const Nothing) k + +pAlterAdjust :: Key -> [(Key, Int)] -> Bool +pAlterAdjust k = M.alter (fmap succ) k `eq_` HM.alter (fmap succ) k + +pAlterInsert :: Key -> [(Key, Int)] -> Bool +pAlterInsert k = M.alter (const $ Just 3) k `eq_` HM.alter (const $ Just 3) k + +pAlterDelete :: Key -> [(Key, Int)] -> Bool +pAlterDelete k = M.alter (const Nothing) k `eq_` HM.alter (const Nothing) k + + +-- We choose the list functor here because we don't fuss with +-- it in alterF rules and because it has a sufficiently interesting +-- structure to have a good chance of breaking if something is wrong. +pAlterF :: Key -> Fun (Maybe A) [Maybe A] -> [(Key, A)] -> Property +pAlterF k f xs = + fmap M.toAscList (M.alterF (apply f) k (M.fromList xs)) + === + fmap toAscList (HM.alterF (apply f) k (HM.fromList xs)) + +#if !MIN_VERSION_base(4,8,0) +newtype Identity a = Identity {runIdentity :: a} +instance Functor Identity where + fmap f (Identity x) = Identity (f x) +#endif + +pAlterFAdjust :: Key -> [(Key, Int)] -> Bool +pAlterFAdjust k = + runIdentity . M.alterF (Identity . fmap succ) k `eq_` + runIdentity . HM.alterF (Identity . fmap succ) k + +pAlterFInsert :: Key -> [(Key, Int)] -> Bool +pAlterFInsert k = + runIdentity . M.alterF (const . Identity . Just $ 3) k `eq_` + runIdentity . HM.alterF (const . Identity . Just $ 3) k + +pAlterFInsertWith :: Key -> Fun Int Int -> [(Key, Int)] -> Bool +pAlterFInsertWith k f = + runIdentity . M.alterF (Identity . Just . maybe 3 (apply f)) k `eq_` + runIdentity . HM.alterF (Identity . Just . maybe 3 (apply f)) k + +pAlterFDelete :: Key -> [(Key, Int)] -> Bool +pAlterFDelete k = + runIdentity . M.alterF (const (Identity Nothing)) k `eq_` + runIdentity . HM.alterF (const (Identity Nothing)) k + +pAlterFLookup :: Key + -> Fun (Maybe A) B + -> [(Key, A)] -> Bool +pAlterFLookup k f = + getConst . M.alterF (Const . apply f :: Maybe A -> Const B (Maybe A)) k + `eq` + getConst . HM.alterF (Const . apply f) k + +------------------------------------------------------------------------ +-- ** Combine + +pUnion :: [(Key, Int)] -> [(Key, Int)] -> Bool +pUnion xs ys = M.union (M.fromList xs) `eq_` HM.union (HM.fromList xs) $ ys + +pUnionWith :: [(Key, Int)] -> [(Key, Int)] -> Bool +pUnionWith xs ys = M.unionWith (-) (M.fromList xs) `eq_` + HM.unionWith (-) (HM.fromList xs) $ ys + +pUnionWithKey :: [(Key, Int)] -> [(Key, Int)] -> Bool +pUnionWithKey xs ys = M.unionWithKey go (M.fromList xs) `eq_` + HM.unionWithKey go (HM.fromList xs) $ ys + where + go :: Key -> Int -> Int -> Int + go (K k) i1 i2 = k - i1 + i2 + +pUnions :: [[(Key, Int)]] -> Bool +pUnions xss = M.toAscList (M.unions (map M.fromList xss)) == + toAscList (HM.unions (map HM.fromList xss)) + +------------------------------------------------------------------------ +-- ** Transformations + +pMap :: [(Key, Int)] -> Bool +pMap = M.map (+ 1) `eq_` HM.map (+ 1) + +pTraverse :: [(Key, Int)] -> Bool +pTraverse xs = + L.sort (fmap (L.sort . M.toList) (M.traverseWithKey (\_ v -> [v + 1, v + 2]) (M.fromList (take 10 xs)))) + == L.sort (fmap (L.sort . HM.toList) (HM.traverseWithKey (\_ v -> [v + 1, v + 2]) (HM.fromList (take 10 xs)))) + +------------------------------------------------------------------------ +-- ** Difference and intersection + +pDifference :: [(Key, Int)] -> [(Key, Int)] -> Bool +pDifference xs ys = M.difference (M.fromList xs) `eq_` + HM.difference (HM.fromList xs) $ ys + +pDifferenceWith :: [(Key, Int)] -> [(Key, Int)] -> Bool +pDifferenceWith xs ys = M.differenceWith f (M.fromList xs) `eq_` + HM.differenceWith f (HM.fromList xs) $ ys + where + f x y = if x == 0 then Nothing else Just (x - y) + +pIntersection :: [(Key, Int)] -> [(Key, Int)] -> Bool +pIntersection xs ys = M.intersection (M.fromList xs) `eq_` + HM.intersection (HM.fromList xs) $ ys + +pIntersectionWith :: [(Key, Int)] -> [(Key, Int)] -> Bool +pIntersectionWith xs ys = M.intersectionWith (-) (M.fromList xs) `eq_` + HM.intersectionWith (-) (HM.fromList xs) $ ys + +pIntersectionWithKey :: [(Key, Int)] -> [(Key, Int)] -> Bool +pIntersectionWithKey xs ys = M.intersectionWithKey go (M.fromList xs) `eq_` + HM.intersectionWithKey go (HM.fromList xs) $ ys + where + go :: Key -> Int -> Int -> Int + go (K k) i1 i2 = k - i1 - i2 + +------------------------------------------------------------------------ +-- ** Folds + +pFoldr :: [(Int, Int)] -> Bool +pFoldr = (L.sort . M.foldr (:) []) `eq` (L.sort . HM.foldr (:) []) + +pFoldrWithKey :: [(Int, Int)] -> Bool +pFoldrWithKey = (sortByKey . M.foldrWithKey f []) `eq` + (sortByKey . HM.foldrWithKey f []) + where f k v z = (k, v) : z + +pFoldl' :: Int -> [(Int, Int)] -> Bool +pFoldl' z0 = foldlWithKey'Map (\ z _ v -> v + z) z0 `eq` HM.foldl' (+) z0 + +foldlWithKey'Map :: (b -> k -> a -> b) -> b -> M.Map k a -> b +#if MIN_VERSION_containers(4,2,0) +foldlWithKey'Map = M.foldlWithKey' +#else +-- Equivalent except for bottoms, which we don't test. +foldlWithKey'Map = M.foldlWithKey +#endif + +------------------------------------------------------------------------ +-- ** Filter + +pMapMaybeWithKey :: [(Key, Int)] -> Bool +pMapMaybeWithKey = M.mapMaybeWithKey f `eq_` HM.mapMaybeWithKey f + where f k v = guard (odd (unK k + v)) >> Just (v + 1) + +pMapMaybe :: [(Key, Int)] -> Bool +pMapMaybe = M.mapMaybe f `eq_` HM.mapMaybe f + where f v = guard (odd v) >> Just (v + 1) + +pFilter :: [(Key, Int)] -> Bool +pFilter = M.filter odd `eq_` HM.filter odd + +pFilterWithKey :: [(Key, Int)] -> Bool +pFilterWithKey = M.filterWithKey p `eq_` HM.filterWithKey p + where p k v = odd (unK k + v) + +------------------------------------------------------------------------ +-- ** Conversions + +-- 'eq_' already calls fromList. +pFromList :: [(Key, Int)] -> Bool +pFromList = id `eq_` id + +pFromListWith :: [(Key, Int)] -> Bool +pFromListWith kvs = (M.toAscList $ M.fromListWith (+) kvs) == + (toAscList $ HM.fromListWith (+) kvs) + +pToList :: [(Key, Int)] -> Bool +pToList = M.toAscList `eq` toAscList + +pElems :: [(Key, Int)] -> Bool +pElems = (L.sort . M.elems) `eq` (L.sort . HM.elems) + +pKeys :: [(Key, Int)] -> Bool +pKeys = (L.sort . M.keys) `eq` (L.sort . HM.keys) + +------------------------------------------------------------------------ +-- * Test list + +tests :: [Test] +tests = + [ + -- Instances + testGroup "instances" + [ testProperty "==" pEq + , testProperty "/=" pNeq + , testProperty "compare reflexive" pOrd1 + , testProperty "compare transitive" pOrd2 + , testProperty "compare antisymmetric" pOrd3 + , testProperty "Ord => Eq" pOrdEq + , testProperty "Read/Show" pReadShow + , testProperty "Functor" pFunctor + , testProperty "Foldable" pFoldable + , testProperty "Hashable" pHashable + ] + -- Basic interface + , testGroup "basic interface" + [ testProperty "size" pSize + , testProperty "member" pMember + , testProperty "lookup" pLookup + , testProperty "insert" pInsert + , testProperty "delete" pDelete + , testProperty "deleteCollision" pDeleteCollision + , testProperty "insertWith" pInsertWith + , testProperty "adjust" pAdjust + , testProperty "updateAdjust" pUpdateAdjust + , testProperty "updateDelete" pUpdateDelete + , testProperty "alterAdjust" pAlterAdjust + , testProperty "alterInsert" pAlterInsert + , testProperty "alterDelete" pAlterDelete + , testProperty "alterF" pAlterF + , testProperty "alterFAdjust" pAlterFAdjust + , testProperty "alterFInsert" pAlterFInsert + , testProperty "alterFInsertWith" pAlterFInsertWith + , testProperty "alterFDelete" pAlterFDelete + , testProperty "alterFLookup" pAlterFLookup + ] + -- Combine + , testProperty "union" pUnion + , testProperty "unionWith" pUnionWith + , testProperty "unionWithKey" pUnionWithKey + , testProperty "unions" pUnions + -- Transformations + , testProperty "map" pMap + , testProperty "traverse" pTraverse + -- Folds + , testGroup "folds" + [ testProperty "foldr" pFoldr + , testProperty "foldrWithKey" pFoldrWithKey + , testProperty "foldl'" pFoldl' + ] + , testGroup "difference and intersection" + [ testProperty "difference" pDifference + , testProperty "differenceWith" pDifferenceWith + , testProperty "intersection" pIntersection + , testProperty "intersectionWith" pIntersectionWith + , testProperty "intersectionWithKey" pIntersectionWithKey + ] + -- Filter + , testGroup "filter" + [ testProperty "filter" pFilter + , testProperty "filterWithKey" pFilterWithKey + , testProperty "mapMaybe" pMapMaybe + , testProperty "mapMaybeWithKey" pMapMaybeWithKey + ] + -- Conversions + , testGroup "conversions" + [ testProperty "elems" pElems + , testProperty "keys" pKeys + , testProperty "fromList" pFromList + , testProperty "fromListWith" pFromListWith + , testProperty "toList" pToList + ] + ] + +------------------------------------------------------------------------ +-- * Model + +type Model k v = M.Map k v + +-- | Check that a function operating on a 'HashMap' is equivalent to +-- one operating on a 'Model'. +eq :: (Eq a, Eq k, Hashable k, Ord k) + => (Model k v -> a) -- ^ Function that modifies a 'Model' + -> (HM.HashMap k v -> a) -- ^ Function that modified a 'HashMap' in the same + -- way + -> [(k, v)] -- ^ Initial content of the 'HashMap' and 'Model' + -> Bool -- ^ True if the functions are equivalent +eq f g xs = g (HM.fromList xs) == f (M.fromList xs) + +infix 4 `eq` + +eq_ :: (Eq k, Eq v, Hashable k, Ord k) + => (Model k v -> Model k v) -- ^ Function that modifies a 'Model' + -> (HM.HashMap k v -> HM.HashMap k v) -- ^ Function that modified a + -- 'HashMap' in the same way + -> [(k, v)] -- ^ Initial content of the 'HashMap' + -- and 'Model' + -> Bool -- ^ True if the functions are + -- equivalent +eq_ f g = (M.toAscList . f) `eq` (toAscList . g) + +infix 4 `eq_` + +------------------------------------------------------------------------ +-- * Test harness + +main :: IO () +main = defaultMain tests + +------------------------------------------------------------------------ +-- * Helpers + +sortByKey :: Ord k => [(k, v)] -> [(k, v)] +sortByKey = L.sortBy (compare `on` fst) + +toAscList :: Ord k => HM.HashMap k v -> [(k, v)] +toAscList = L.sortBy (compare `on` fst) . HM.toList diff --git a/tests/HashSetProperties.hs b/tests/HashSetProperties.hs new file mode 100644 index 0000000..130c1dc --- /dev/null +++ b/tests/HashSetProperties.hs @@ -0,0 +1,248 @@ +{-# LANGUAGE CPP, GeneralizedNewtypeDeriving #-} + +-- | Tests for the 'Data.HashSet' module. We test functions by +-- comparing them to a simpler model, a list. + +module Main (main) where + +import qualified Data.Foldable as Foldable +import Data.Hashable (Hashable(hashWithSalt)) +import qualified Data.List as L +import qualified Data.HashSet as S +import qualified Data.Set as Set +import Data.Ord (comparing) +import Test.QuickCheck (Arbitrary, Property, (==>), (===)) +import Test.Framework (Test, defaultMain, testGroup) +import Test.Framework.Providers.QuickCheck2 (testProperty) + +-- Key type that generates more hash collisions. +newtype Key = K { unK :: Int } + deriving (Arbitrary, Enum, Eq, Integral, Num, Ord, Read, Show, Real) + +instance Hashable Key where + hashWithSalt salt k = hashWithSalt salt (unK k) `mod` 20 + +------------------------------------------------------------------------ +-- * Properties + +------------------------------------------------------------------------ +-- ** Instances + +pEq :: [Key] -> [Key] -> Bool +pEq xs = (Set.fromList xs ==) `eq` (S.fromList xs ==) + +pNeq :: [Key] -> [Key] -> Bool +pNeq xs = (Set.fromList xs /=) `eq` (S.fromList xs /=) + +-- We cannot compare to `Data.Map` as ordering is different. +pOrd1 :: [Key] -> Bool +pOrd1 xs = compare x x == EQ + where + x = S.fromList xs + +pOrd2 :: [Key] -> [Key] -> [Key] -> Bool +pOrd2 xs ys zs = case (compare x y, compare y z) of + (EQ, o) -> compare x z == o + (o, EQ) -> compare x z == o + (LT, LT) -> compare x z == LT + (GT, GT) -> compare x z == GT + (LT, GT) -> True -- ys greater than xs and zs. + (GT, LT) -> True + where + x = S.fromList xs + y = S.fromList ys + z = S.fromList zs + +pOrd3 :: [Key] -> [Key] -> Bool +pOrd3 xs ys = case (compare x y, compare y x) of + (EQ, EQ) -> True + (LT, GT) -> True + (GT, LT) -> True + _ -> False + where + x = S.fromList xs + y = S.fromList ys + +pOrdEq :: [Key] -> [Key] -> Bool +pOrdEq xs ys = case (compare x y, x == y) of + (EQ, True) -> True + (LT, False) -> True + (GT, False) -> True + _ -> False + where + x = S.fromList xs + y = S.fromList ys + +pReadShow :: [Key] -> Bool +pReadShow xs = Set.fromList xs == read (show (Set.fromList xs)) + +pFoldable :: [Int] -> Bool +pFoldable = (L.sort . Foldable.foldr (:) []) `eq` + (L.sort . Foldable.foldr (:) []) + +pPermutationEq :: [Key] -> [Int] -> Bool +pPermutationEq xs is = S.fromList xs == S.fromList ys + where + ys = shuffle is xs + shuffle idxs = L.map snd + . L.sortBy (comparing fst) + . L.zip (idxs ++ [L.maximum (0:is) + 1 ..]) + +pHashable :: [Key] -> [Int] -> Int -> Property +pHashable xs is salt = + x == y ==> hashWithSalt salt x === hashWithSalt salt y + where + xs' = L.nub xs + ys = shuffle is xs' + x = S.fromList xs' + y = S.fromList ys + shuffle idxs = L.map snd + . L.sortBy (comparing fst) + . L.zip (idxs ++ [L.maximum (0:is) + 1 ..]) + +------------------------------------------------------------------------ +-- ** Basic interface + +pSize :: [Key] -> Bool +pSize = Set.size `eq` S.size + +pMember :: Key -> [Key] -> Bool +pMember k = Set.member k `eq` S.member k + +pInsert :: Key -> [Key] -> Bool +pInsert a = Set.insert a `eq_` S.insert a + +pDelete :: Key -> [Key] -> Bool +pDelete a = Set.delete a `eq_` S.delete a + +------------------------------------------------------------------------ +-- ** Combine + +pUnion :: [Key] -> [Key] -> Bool +pUnion xs ys = Set.union (Set.fromList xs) `eq_` + S.union (S.fromList xs) $ ys + +------------------------------------------------------------------------ +-- ** Transformations + +pMap :: [Key] -> Bool +pMap = Set.map (+ 1) `eq_` S.map (+ 1) + +------------------------------------------------------------------------ +-- ** Folds + +pFoldr :: [Int] -> Bool +pFoldr = (L.sort . foldrSet (:) []) `eq` + (L.sort . S.foldr (:) []) + +foldrSet :: (a -> b -> b) -> b -> Set.Set a -> b +#if MIN_VERSION_containers(0,4,2) +foldrSet = Set.foldr +#else +foldrSet = Foldable.foldr +#endif + +pFoldl' :: Int -> [Int] -> Bool +pFoldl' z0 = foldl'Set (+) z0 `eq` S.foldl' (+) z0 + +foldl'Set :: (a -> b -> a) -> a -> Set.Set b -> a +#if MIN_VERSION_containers(0,4,2) +foldl'Set = Set.foldl' +#else +foldl'Set = Foldable.foldl' +#endif + +------------------------------------------------------------------------ +-- ** Filter + +pFilter :: [Key] -> Bool +pFilter = Set.filter odd `eq_` S.filter odd + +------------------------------------------------------------------------ +-- ** Conversions + +pToList :: [Key] -> Bool +pToList = Set.toAscList `eq` toAscList + +------------------------------------------------------------------------ +-- * Test list + +tests :: [Test] +tests = + [ + -- Instances + testGroup "instances" + [ testProperty "==" pEq + , testProperty "Permutation ==" pPermutationEq + , testProperty "/=" pNeq + , testProperty "compare reflexive" pOrd1 + , testProperty "compare transitive" pOrd2 + , testProperty "compare antisymmetric" pOrd3 + , testProperty "Ord => Eq" pOrdEq + , testProperty "Read/Show" pReadShow + , testProperty "Foldable" pFoldable + , testProperty "Hashable" pHashable + ] + -- Basic interface + , testGroup "basic interface" + [ testProperty "size" pSize + , testProperty "member" pMember + , testProperty "insert" pInsert + , testProperty "delete" pDelete + ] + -- Combine + , testProperty "union" pUnion + -- Transformations + , testProperty "map" pMap + -- Folds + , testGroup "folds" + [ testProperty "foldr" pFoldr + , testProperty "foldl'" pFoldl' + ] + -- Filter + , testGroup "filter" + [ testProperty "filter" pFilter + ] + -- Conversions + , testGroup "conversions" + [ testProperty "toList" pToList + ] + ] + +------------------------------------------------------------------------ +-- * Model + +-- Invariant: the list is sorted in ascending order, by key. +type Model a = Set.Set a + +-- | Check that a function operating on a 'HashMap' is equivalent to +-- one operating on a 'Model'. +eq :: (Eq a, Hashable a, Ord a, Eq b) + => (Model a -> b) -- ^ Function that modifies a 'Model' in the same + -- way + -> (S.HashSet a -> b) -- ^ Function that modified a 'HashSet' + -> [a] -- ^ Initial content of the 'HashSet' and 'Model' + -> Bool -- ^ True if the functions are equivalent +eq f g xs = g (S.fromList xs) == f (Set.fromList xs) + +eq_ :: (Eq a, Hashable a, Ord a) + => (Model a -> Model a) -- ^ Function that modifies a 'Model' + -> (S.HashSet a -> S.HashSet a) -- ^ Function that modified a + -- 'HashSet' in the same way + -> [a] -- ^ Initial content of the 'HashSet' + -- and 'Model' + -> Bool -- ^ True if the functions are + -- equivalent +eq_ f g = (Set.toAscList . f) `eq` (toAscList . g) + +------------------------------------------------------------------------ +-- * Test harness + +main :: IO () +main = defaultMain tests + +------------------------------------------------------------------------ +-- * Helpers + +toAscList :: Ord a => S.HashSet a -> [a] +toAscList = L.sort . S.toList diff --git a/tests/List.hs b/tests/List.hs new file mode 100644 index 0000000..2bf8e0b --- /dev/null +++ b/tests/List.hs @@ -0,0 +1,68 @@ +module Main (main) where + +import Data.HashMap.List +import Data.List (nub, sort, sortBy) +import Data.Ord (comparing) + +import Test.Framework (Test, defaultMain, testGroup) +import Test.Framework.Providers.QuickCheck2 (testProperty) +import Test.QuickCheck ((==>), (===), property, Property) + +tests :: Test +tests = testGroup "Data.HashMap.List" + [ testProperty "isPermutationBy" pIsPermutation + , testProperty "isPermutationBy of different length" pIsPermutationDiffLength + , testProperty "pUnorderedCompare" pUnorderedCompare + , testGroup "modelUnorderedCompare" + [ testProperty "reflexive" modelUnorderedCompareRefl + , testProperty "anti-symmetric" modelUnorderedCompareAntiSymm + , testProperty "transitive" modelUnorderedCompareTrans + ] + ] + +pIsPermutation :: [Char] -> [Int] -> Bool +pIsPermutation xs is = isPermutationBy (==) xs xs' + where + is' = nub is ++ [maximum (0:is) + 1 ..] + xs' = map fst . sortBy (comparing snd) $ zip xs is' + +pIsPermutationDiffLength :: [Int] -> [Int] -> Property +pIsPermutationDiffLength xs ys = + length xs /= length ys ==> isPermutationBy (==) xs ys === False + +-- | Homogenous version of 'unorderedCompare' +-- +-- *Compare smallest non-equal elements of the two lists*. +modelUnorderedCompare :: Ord a => [a] -> [a] -> Ordering +modelUnorderedCompare as bs = compare (sort as) (sort bs) + +modelUnorderedCompareRefl :: [Int] -> Property +modelUnorderedCompareRefl xs = modelUnorderedCompare xs xs === EQ + +modelUnorderedCompareAntiSymm :: [Int] -> [Int] -> Property +modelUnorderedCompareAntiSymm xs ys = case a of + EQ -> b === EQ + LT -> b === GT + GT -> b === LT + where + a = modelUnorderedCompare xs ys + b = modelUnorderedCompare ys xs + +modelUnorderedCompareTrans :: [Int] -> [Int] -> [Int] -> Property +modelUnorderedCompareTrans xs ys zs = + case (modelUnorderedCompare xs ys, modelUnorderedCompare ys zs) of + (EQ, yz) -> xz === yz + (xy, EQ) -> xz === xy + (LT, LT) -> xz === LT + (GT, GT) -> xz === GT + (LT, GT) -> property True + (GT, LT) -> property True + where + xz = modelUnorderedCompare xs zs + +pUnorderedCompare :: [Int] -> [Int] -> Property +pUnorderedCompare xs ys = + unorderedCompare compare xs ys === modelUnorderedCompare xs ys + +main :: IO () +main = defaultMain [tests] diff --git a/tests/Regressions.hs b/tests/Regressions.hs new file mode 100644 index 0000000..531b162 --- /dev/null +++ b/tests/Regressions.hs @@ -0,0 +1,89 @@ +module Main where + +import Control.Applicative ((<$>)) +import Control.Monad (replicateM) +import qualified Data.HashMap.Strict as HM +import Data.List (delete) +import Data.Maybe +import Test.HUnit (Assertion, assert) +import Test.Framework (Test, defaultMain) +import Test.Framework.Providers.HUnit (testCase) +import Test.Framework.Providers.QuickCheck2 (testProperty) +import Test.QuickCheck + +issue32 :: Assertion +issue32 = assert $ isJust $ HM.lookup 7 m' + where + ns = [0..16] :: [Int] + m = HM.fromList (zip ns (repeat [])) + m' = HM.delete 10 m + +------------------------------------------------------------------------ +-- Issue #39 + +-- First regression + +issue39 :: Assertion +issue39 = assert $ hm1 == hm2 + where + hm1 = HM.fromList ([a, b] `zip` [1, 1 :: Int ..]) + hm2 = HM.fromList ([b, a] `zip` [1, 1 :: Int ..]) + a = (1, -1) :: (Int, Int) + b = (-1, 1) :: (Int, Int) + +-- Second regression + +newtype Keys = Keys [Int] + deriving Show + +instance Arbitrary Keys where + arbitrary = sized $ \l -> do + pis <- replicateM (l+1) positiveInt + return (Keys $ prefixSum pis) + + shrink (Keys ls) = + let l = length ls + in if l == 1 + then [] + else [ Keys (dropAt i ls) | i <- [0..l-1] ] + +positiveInt :: Gen Int +positiveInt = (+1) . abs <$> arbitrary + +prefixSum :: [Int] -> [Int] +prefixSum = loop 0 + where + loop _ [] = [] + loop prefix (l:ls) = let n = l + prefix + in n : loop n ls + +dropAt :: Int -> [a] -> [a] +dropAt _ [] = [] +dropAt i (l:ls) | i == 0 = ls + | otherwise = l : dropAt (i-1) ls + +propEqAfterDelete :: Keys -> Bool +propEqAfterDelete (Keys keys) = + let keyMap = mapFromKeys keys + k = head keys + in HM.delete k keyMap == mapFromKeys (delete k keys) + +mapFromKeys :: [Int] -> HM.HashMap Int () +mapFromKeys keys = HM.fromList (zip keys (repeat ())) + +------------------------------------------------------------------------ +-- * Test list + +tests :: [Test] +tests = + [ + testCase "issue32" issue32 + , testCase "issue39a" issue39 + , testProperty "issue39b" propEqAfterDelete + ] + +------------------------------------------------------------------------ +-- * Test harness + +main :: IO () +main = defaultMain tests diff --git a/tests/Strictness.hs b/tests/Strictness.hs new file mode 100644 index 0000000..aa7e979 --- /dev/null +++ b/tests/Strictness.hs @@ -0,0 +1,192 @@ +{-# LANGUAGE CPP, FlexibleInstances, GeneralizedNewtypeDeriving #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Main (main) where + +import Data.Hashable (Hashable(hashWithSalt)) +import Test.ChasingBottoms.IsBottom +import Test.Framework (Test, defaultMain, testGroup) +import Test.Framework.Providers.QuickCheck2 (testProperty) +import Test.QuickCheck (Arbitrary(arbitrary), Property, (===), (.&&.)) +import Test.QuickCheck.Function +import Test.QuickCheck.Poly (A) +import Data.Maybe (fromMaybe, isJust) +import Control.Arrow (second) +import Control.Monad (guard) +import Data.Foldable (foldl') +#if !MIN_VERSION_base(4,8,0) +import Data.Functor ((<$)) +import Data.Foldable (all) +import Prelude hiding (all) +#endif + +import Data.HashMap.Strict (HashMap) +import qualified Data.HashMap.Strict as HM + +-- Key type that generates more hash collisions. +newtype Key = K { unK :: Int } + deriving (Arbitrary, Eq, Ord, Show) + +instance Hashable Key where + hashWithSalt salt k = hashWithSalt salt (unK k) `mod` 20 + +instance (Arbitrary k, Arbitrary v, Eq k, Hashable k) => + Arbitrary (HashMap k v) where + arbitrary = HM.fromList `fmap` arbitrary + +instance Show (Int -> Int) where + show _ = "" + +instance Show (Int -> Int -> Int) where + show _ = "" + +------------------------------------------------------------------------ +-- * Properties + +------------------------------------------------------------------------ +-- ** Strict module + +pSingletonKeyStrict :: Int -> Bool +pSingletonKeyStrict v = isBottom $ HM.singleton (bottom :: Key) v + +pSingletonValueStrict :: Key -> Bool +pSingletonValueStrict k = isBottom $ (HM.singleton k (bottom :: Int)) + +pLookupDefaultKeyStrict :: Int -> HashMap Key Int -> Bool +pLookupDefaultKeyStrict def m = isBottom $ HM.lookupDefault def bottom m + +pAdjustKeyStrict :: (Int -> Int) -> HashMap Key Int -> Bool +pAdjustKeyStrict f m = isBottom $ HM.adjust f bottom m + +pAdjustValueStrict :: Key -> HashMap Key Int -> Bool +pAdjustValueStrict k m + | k `HM.member` m = isBottom $ HM.adjust (const bottom) k m + | otherwise = case HM.keys m of + [] -> True + (k':_) -> isBottom $ HM.adjust (const bottom) k' m + +pInsertKeyStrict :: Int -> HashMap Key Int -> Bool +pInsertKeyStrict v m = isBottom $ HM.insert bottom v m + +pInsertValueStrict :: Key -> HashMap Key Int -> Bool +pInsertValueStrict k m = isBottom $ HM.insert k bottom m + +pInsertWithKeyStrict :: (Int -> Int -> Int) -> Int -> HashMap Key Int -> Bool +pInsertWithKeyStrict f v m = isBottom $ HM.insertWith f bottom v m + +pInsertWithValueStrict :: (Int -> Int -> Int) -> Key -> Int -> HashMap Key Int + -> Bool +pInsertWithValueStrict f k v m + | HM.member k m = isBottom $ HM.insertWith (const2 bottom) k v m + | otherwise = isBottom $ HM.insertWith f k bottom m + +pFromListKeyStrict :: Bool +pFromListKeyStrict = isBottom $ HM.fromList [(undefined :: Key, 1 :: Int)] + +pFromListValueStrict :: Bool +pFromListValueStrict = isBottom $ HM.fromList [(K 1, undefined)] + +pFromListWithKeyStrict :: (Int -> Int -> Int) -> Bool +pFromListWithKeyStrict f = + isBottom $ HM.fromListWith f [(undefined :: Key, 1 :: Int)] + +-- The strictness properties of 'fromListWith' are not entirely +-- trivial. +-- fromListWith f kvs is strict in the first value seen for each +-- key, but potentially lazy in the rest: the combining function +-- could be lazy in the "new" value. fromListWith must, however, +-- be strict in whatever value is actually inserted into the map. +-- Getting all these properties specified efficiently seems tricky. +-- Since it's not hard, we verify that the converted HashMap has +-- no unforced values. Rather than trying to go into detail for the +-- rest, this test compares the strictness behavior of fromListWith +-- to that of insertWith. The latter should be easier to specify +-- and (if we choose to do so) test thoroughly. +-- +-- We'll fake up a representation of things that are possibly +-- bottom by using Nothing to represent bottom. The combining +-- (partial) function is represented by a "lazy total" function +-- Maybe a -> Maybe a -> Maybe a, along with a function determining +-- whether the result should be non-bottom, Maybe a -> Maybe a -> Bool, +-- indicating how the combining function should behave if neither +-- argument, just the first argument, just the second argument, +-- or both arguments are bottom. It would be quite tempting to +-- just use Maybe A -> Maybe A -> Maybe A, but that would not +-- necessarily be continous. +pFromListWithValueResultStrict :: [(Key, Maybe A)] + -> Fun (Maybe A, Maybe A) A + -> Fun (Maybe A, Maybe A) Bool + -> Property +pFromListWithValueResultStrict lst comb_lazy calc_good_raw + = all (all isJust) recovered .&&. (recovered === recover (fmap recover fake_map)) + where + recovered :: Maybe (HashMap Key (Maybe A)) + recovered = recover (fmap recover real_map) + -- What we get out of the conversion using insertWith + fake_map = foldl' (\m (k,v) -> HM.insertWith real_comb k v m) HM.empty real_list + + -- A continuous version of calc_good_raw + calc_good Nothing Nothing = cgr Nothing Nothing + calc_good Nothing y@(Just _) = cgr Nothing Nothing || cgr Nothing y + calc_good x@(Just _) Nothing = cgr Nothing Nothing || cgr x Nothing + calc_good x y = cgr Nothing Nothing || cgr Nothing y || cgr x Nothing || cgr x y + cgr = curry $ apply calc_good_raw + + -- The Maybe A -> Maybe A -> Maybe A that we're after, representing a + -- potentially less total function than comb_lazy + comb x y = apply comb_lazy (x, y) <$ guard (calc_good x y) + + -- What we get out of the conversion using fromListWith + real_map = HM.fromListWith real_comb real_list + + -- A list that may have actual bottom values in it. + real_list = map (second (fromMaybe bottom)) lst + + -- A genuinely partial function mirroring comb + real_comb x y = fromMaybe bottom $ comb (recover x) (recover y) + + recover :: a -> Maybe a + recover a = a <$ guard (not $ isBottom a) + +------------------------------------------------------------------------ +-- * Test list + +tests :: [Test] +tests = + [ + -- Basic interface + testGroup "HashMap.Strict" + [ testProperty "singleton is key-strict" pSingletonKeyStrict + , testProperty "singleton is value-strict" pSingletonValueStrict + , testProperty "member is key-strict" $ keyStrict HM.member + , testProperty "lookup is key-strict" $ keyStrict HM.lookup + , testProperty "lookupDefault is key-strict" pLookupDefaultKeyStrict + , testProperty "! is key-strict" $ keyStrict (flip (HM.!)) + , testProperty "delete is key-strict" $ keyStrict HM.delete + , testProperty "adjust is key-strict" pAdjustKeyStrict + , testProperty "adjust is value-strict" pAdjustValueStrict + , testProperty "insert is key-strict" pInsertKeyStrict + , testProperty "insert is value-strict" pInsertValueStrict + , testProperty "insertWith is key-strict" pInsertWithKeyStrict + , testProperty "insertWith is value-strict" pInsertWithValueStrict + , testProperty "fromList is key-strict" pFromListKeyStrict + , testProperty "fromList is value-strict" pFromListValueStrict + , testProperty "fromListWith is key-strict" pFromListWithKeyStrict + , testProperty "fromListWith is value-strict" pFromListWithValueResultStrict + ] + ] + +------------------------------------------------------------------------ +-- * Test harness + +main :: IO () +main = defaultMain tests + +------------------------------------------------------------------------ +-- * Utilities + +keyStrict :: (Key -> HashMap Key Int -> a) -> HashMap Key Int -> Bool +keyStrict f m = isBottom $ f bottom m + +const2 :: a -> b -> c -> a +const2 x _ _ = x diff --git a/unordered-containers.cabal b/unordered-containers.cabal new file mode 100644 index 0000000..90cf643 --- /dev/null +++ b/unordered-containers.cabal @@ -0,0 +1,218 @@ +name: unordered-containers +version: 0.2.10.0 +synopsis: Efficient hashing-based container types +description: + Efficient hashing-based container types. The containers have been + optimized for performance critical use, both in terms of large data + quantities and high speed. + . + The declared cost of each operation is either worst-case or + amortized, but remains valid even if structures are shared. +license: BSD3 +license-file: LICENSE +author: Johan Tibell +maintainer: johan.tibell@gmail.com +Homepage: https://github.com/tibbe/unordered-containers +bug-reports: https://github.com/tibbe/unordered-containers/issues +copyright: 2010-2014 Johan Tibell + 2010 Edward Z. Yang +category: Data +build-type: Simple +cabal-version: >=1.10 +extra-source-files: CHANGES.md +tested-with: GHC==8.4.1, GHC==8.2.2, GHC==8.0.2, GHC==7.10.3, GHC==7.8.4 + +flag debug + description: Enable debug support + default: False + +library + exposed-modules: + Data.HashMap.Lazy + Data.HashMap.Strict + Data.HashSet + other-modules: + Data.HashMap.Array + Data.HashMap.Base + Data.HashMap.Strict.Base + Data.HashMap.List + Data.HashMap.Unsafe + Data.HashMap.UnsafeShift + Data.HashSet.Base + + build-depends: + base >= 4.7 && < 5, + deepseq >= 1.1, + hashable >= 1.0.1.1 && < 1.3 + + default-language: Haskell2010 + + other-extensions: + RoleAnnotations, + UnboxedTuples, + ScopedTypeVariables, + MagicHash, + BangPatterns + + ghc-options: -Wall -O2 -fwarn-tabs -ferror-spans + + if impl (ghc < 8.2) + -- This is absolutely necessary (but not sufficient) for correctness due to + -- the referential-transparency-breaking mutability in unsafeInsertWith. See + -- #147 and GHC #13615 for details. The bug was fixed in GHC 8.2. + ghc-options: -feager-blackholing + if flag(debug) + cpp-options: -DASSERTS + +test-suite hashmap-lazy-properties + hs-source-dirs: tests + main-is: HashMapProperties.hs + type: exitcode-stdio-1.0 + + build-depends: + base, + containers >= 0.5.8, + hashable >= 1.0.1.1, + QuickCheck >= 2.4.0.1, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9, + unordered-containers + + default-language: Haskell2010 + ghc-options: -Wall + cpp-options: -DASSERTS + +test-suite hashmap-strict-properties + hs-source-dirs: tests + main-is: HashMapProperties.hs + type: exitcode-stdio-1.0 + + build-depends: + base, + containers >= 0.5.8, + hashable >= 1.0.1.1, + QuickCheck >= 2.4.0.1, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9, + unordered-containers + + default-language: Haskell2010 + ghc-options: -Wall + cpp-options: -DASSERTS -DSTRICT + +test-suite hashset-properties + hs-source-dirs: tests + main-is: HashSetProperties.hs + type: exitcode-stdio-1.0 + + build-depends: + base, + containers >= 0.4, + hashable >= 1.0.1.1, + QuickCheck >= 2.4.0.1, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9, + unordered-containers + + default-language: Haskell2010 + ghc-options: -Wall + cpp-options: -DASSERTS + +test-suite list-tests + hs-source-dirs: tests . + main-is: List.hs + other-modules: + Data.HashMap.List + type: exitcode-stdio-1.0 + + build-depends: + base, + containers >= 0.4, + QuickCheck >= 2.4.0.1, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9 + + default-language: Haskell2010 + ghc-options: -Wall + cpp-options: -DASSERTS + +test-suite regressions + hs-source-dirs: tests + main-is: Regressions.hs + type: exitcode-stdio-1.0 + + build-depends: + base, + hashable >= 1.0.1.1, + HUnit, + QuickCheck >= 2.4.0.1, + test-framework >= 0.3.3, + test-framework-hunit, + test-framework-quickcheck2, + unordered-containers + + default-language: Haskell2010 + ghc-options: -Wall + cpp-options: -DASSERTS + +test-suite strictness-properties + hs-source-dirs: tests + main-is: Strictness.hs + type: exitcode-stdio-1.0 + + build-depends: + base, + ChasingBottoms, + containers >= 0.4.2, + hashable >= 1.0.1.1, + QuickCheck >= 2.4.0.1, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9, + unordered-containers + + default-language: Haskell2010 + ghc-options: -Wall + cpp-options: -DASSERTS + +benchmark benchmarks + -- We cannot depend on the unordered-containers library directly as + -- that creates a dependency cycle. + hs-source-dirs: . benchmarks + + main-is: Benchmarks.hs + type: exitcode-stdio-1.0 + + other-modules: + Data.HashMap.Array + Data.HashMap.Base + Data.HashMap.Lazy + Data.HashMap.Strict + Data.HashMap.Strict.Base + Data.HashMap.Unsafe + Data.HashMap.UnsafeShift + Data.HashSet + Data.HashSet.Base + Util.ByteString + Util.Int + Util.String + + build-depends: + base >= 4.8.0, + bytestring, + containers, + criterion >= 1.0 && < 1.3, + deepseq >= 1.1, + deepseq-generics, + hashable >= 1.0.1.1, + hashmap, + mtl, + random + + default-language: Haskell2010 + ghc-options: -Wall -O2 -rtsopts -fwarn-tabs -ferror-spans + if flag(debug) + cpp-options: -DASSERTS + +source-repository head + type: git + location: https://github.com/tibbe/unordered-containers.git -- 2.30.2