@@ -49,6 +49,8 @@ module Data.HashMap.Internal
4949 , update
5050 , alter
5151 , alterF
52+ , isSubmapOf
53+ , isSubmapOfBy
5254
5355 -- * Combine
5456 -- ** Union
@@ -148,7 +150,7 @@ import qualified Data.Foldable as Foldable
148150import Data.Bifoldable
149151#endif
150152import qualified Data.List as L
151- import GHC.Exts ((==#) , build , reallyUnsafePtrEquality #)
153+ import GHC.Exts ((==#) , build , reallyUnsafePtrEquality #, inline )
152154import Prelude hiding (filter , foldl , foldr , lookup , map , null , pred )
153155import Text.Read hiding (step )
154156
@@ -590,12 +592,12 @@ lookup k m = case lookup# k m of
590592{-# INLINE lookup #-}
591593
592594lookup # :: (Eq k , Hashable k ) => k -> HashMap k v -> (# (# # ) | v # )
593- lookup # k m = lookupCont (\ _ -> (# (# # ) | # )) (\ v _i -> (# | v # )) (hash k) k m
595+ lookup # k m = lookupCont (\ _ -> (# (# # ) | # )) (\ v _i -> (# | v # )) (hash k) k 0 m
594596{-# INLINABLE lookup# #-}
595597
596598#else
597599
598- lookup k m = lookupCont (\ _ -> Nothing ) (\ v _i -> Just v) (hash k) k m
600+ lookup k m = lookupCont (\ _ -> Nothing ) (\ v _i -> Just v) (hash k) k 0 m
599601{-# INLINABLE lookup #-}
600602#endif
601603
@@ -614,7 +616,7 @@ lookup' h k m = case lookupRecordCollision# h k m of
614616 (# | (# a, _i # ) # ) -> Just a
615617{-# INLINE lookup' #-}
616618#else
617- lookup' h k m = lookupCont (\ _ -> Nothing ) (\ v _i -> Just v) h k m
619+ lookup' h k m = lookupCont (\ _ -> Nothing ) (\ v _i -> Just v) h k 0 m
618620{-# INLINABLE lookup' #-}
619621#endif
620622
@@ -649,13 +651,13 @@ lookupRecordCollision h k m = case lookupRecordCollision# h k m of
649651-- into lookupCont because inlining takes care of that.
650652lookupRecordCollision# :: Eq k => Hash -> k -> HashMap k v -> (# (# # ) | (# v , Int # # ) # )
651653lookupRecordCollision# h k m =
652- lookupCont (\ _ -> (# (# # ) | # )) (\ v (I # i) -> (# | (# v, i # ) # )) h k m
654+ lookupCont (\ _ -> (# (# # ) | # )) (\ v (I # i) -> (# | (# v, i # ) # )) h k 0 m
653655-- INLINABLE to specialize to the Eq instance.
654656{-# INLINABLE lookupRecordCollision# #-}
655657
656658#else /* GHC < 8.2 so there are no unboxed sums */
657659
658- lookupRecordCollision h k m = lookupCont (\ _ -> Absent ) Present h k m
660+ lookupRecordCollision h k m = lookupCont (\ _ -> Absent ) Present h k 0 m
659661{-# INLINABLE lookupRecordCollision #-}
660662#endif
661663
@@ -667,6 +669,10 @@ lookupRecordCollision h k m = lookupCont (\_ -> Absent) Present h k m
667669-- so we can be representation-polymorphic in the result type. Since
668670-- this whole thing is always inlined, we don't have to worry about
669671-- any extra CPS overhead.
672+ --
673+ -- The @Int@ argument is the offset of the subkey in the hash. When looking up
674+ -- keys at the top-level of a hashmap, the offset should be 0. When looking up
675+ -- keys at level @n@ of a hashmap, the offset should be @n * bitsPerSubkey@.
670676lookupCont ::
671677#if __GLASGOW_HASKELL__ >= 802
672678 forall rep (r :: TYPE rep ) k v.
@@ -677,8 +683,10 @@ lookupCont ::
677683 => ((# # ) -> r) -- Absent continuation
678684 -> (v -> Int -> r) -- Present continuation
679685 -> Hash -- The hash of the key
680- -> k -> HashMap k v -> r
681- lookupCont absent present ! h0 ! k0 ! m0 = go h0 k0 0 m0
686+ -> k
687+ -> Int -- The offset of the subkey in the hash.
688+ -> HashMap k v -> r
689+ lookupCont absent present ! h0 ! k0 ! s0 ! m0 = go h0 k0 s0 m0
682690 where
683691 go :: Eq k => Hash -> k -> Int -> HashMap k v -> r
684692 go ! _ ! _ ! _ Empty = absent (# # )
@@ -1409,6 +1417,116 @@ alterFEager f !k m = (<$> f mv) $ \fres ->
14091417{-# INLINABLE alterFEager #-}
14101418#endif
14111419
1420+ -- | /O(n*log m)/ Inclusion of maps. A map is included in another map if the keys
1421+ -- are subsets and the corresponding values are equal:
1422+ --
1423+ -- > isSubmapOf m1 m2 = keys m1 `isSubsetOf` keys m2 &&
1424+ -- > and [ v1 == v2 | (k1,v1) <- toList m1; let v2 = m2 ! k1 ]
1425+ --
1426+ -- ==== __Examples__
1427+ --
1428+ -- >>> fromList [(1,'a')] `isSubmapOf` fromList [(1,'a'),(2,'b')]
1429+ -- True
1430+ --
1431+ -- >>> fromList [(1,'a'),(2,'b')] `isSubmapOf` fromList [(1,'a')]
1432+ -- False
1433+ isSubmapOf :: (Eq k , Hashable k , Eq v ) => HashMap k v -> HashMap k v -> Bool
1434+ isSubmapOf = (inline isSubmapOfBy) (==)
1435+ {-# INLINABLE isSubmapOf #-}
1436+
1437+ -- | /O(n*log m)/ Inclusion of maps with value comparison. A map is included in
1438+ -- another map if the keys are subsets and if the comparison function is true
1439+ -- for the corresponding values:
1440+ --
1441+ -- > isSubmapOfBy cmpV m1 m2 = keys m1 `isSubsetOf` keys m2 &&
1442+ -- > and [ v1 `cmpV` v2 | (k1,v1) <- toList m1; let v2 = m2 ! k1 ]
1443+ --
1444+ -- ==== __Examples__
1445+ --
1446+ -- >>> isSubmapOfBy (<=) (fromList [(1,'a')]) (fromList [(1,'b'),(2,'c')])
1447+ -- True
1448+ --
1449+ -- >>> isSubmapOfBy (<=) (fromList [(1,'b')]) (fromList [(1,'a'),(2,'c')])
1450+ -- False
1451+ isSubmapOfBy :: (Eq k , Hashable k ) => (v1 -> v2 -> Bool ) -> HashMap k v1 -> HashMap k v2 -> Bool
1452+ -- For maps without collisions the complexity is O(n*log m), where n is the size
1453+ -- of m1 and m the size of m2: the inclusion operation visits every leaf in m1 at least once.
1454+ -- For each leaf in m1, it looks up the key in m2.
1455+ --
1456+ -- The worst case complexity is O(n*m). The worst case is when both hashmaps m1
1457+ -- and m2 are collision nodes for the same hash. Since collision nodes are
1458+ -- unsorted arrays, it requires for every key in m1 a linear search to to find a
1459+ -- matching key in m2, hence O(n*m).
1460+ isSubmapOfBy comp ! m1 ! m2 = go 0 m1 m2
1461+ where
1462+ -- An empty map is always a submap of any other map.
1463+ go _ Empty _ = True
1464+
1465+ -- If the second map is empty and the first is not, it cannot be a submap.
1466+ go _ _ Empty = False
1467+
1468+ -- If the first map contains only one entry, lookup the key in the second map.
1469+ go s (Leaf h1 (L k1 v1)) t2 = lookupCont (\ _ -> False ) (\ v2 _ -> comp v1 v2) h1 k1 s t2
1470+
1471+ -- In this case, we need to check that for each x in ls1, there is a y in
1472+ -- ls2 such that x `comp` y. This is the worst case complexity-wise since it
1473+ -- requires a O(m*n) check.
1474+ go _ (Collision h1 ls1) (Collision h2 ls2) =
1475+ h1 == h2 && subsetArray comp ls1 ls2
1476+
1477+ -- In this case, we only need to check the entries in ls2 with the hash h1.
1478+ go s t1@ (Collision h1 _) (BitmapIndexed b ls2)
1479+ | b .&. m == 0 = False
1480+ | otherwise =
1481+ go (s+ bitsPerSubkey) t1 (A. index ls2 (sparseIndex b m))
1482+ where m = mask h1 s
1483+
1484+ -- Similar to the previous case we need to traverse l2 at the index for the hash h1.
1485+ go s t1@ (Collision h1 _) (Full ls2) =
1486+ go (s+ bitsPerSubkey) t1 (A. index ls2 (index h1 s))
1487+
1488+ -- In cases where the first and second map are BitmapIndexed or Full,
1489+ -- traverse down the tree at the appropriate indices.
1490+ go s (BitmapIndexed b1 ls1) (BitmapIndexed b2 ls2) =
1491+ submapBitmapIndexed (go (s+ bitsPerSubkey)) b1 ls1 b2 ls2
1492+ go s (BitmapIndexed b1 ls1) (Full ls2) =
1493+ submapBitmapIndexed (go (s+ bitsPerSubkey)) b1 ls1 fullNodeMask ls2
1494+ go s (Full ls1) (Full ls2) =
1495+ submapBitmapIndexed (go (s+ bitsPerSubkey)) fullNodeMask ls1 fullNodeMask ls2
1496+
1497+ -- Collision and Full nodes always contain at least two entries. Hence it
1498+ -- cannot be a map of a leaf.
1499+ go _ (Collision {}) (Leaf {}) = False
1500+ go _ (BitmapIndexed {}) (Leaf {}) = False
1501+ go _ (Full {}) (Leaf {}) = False
1502+ go _ (BitmapIndexed {}) (Collision {}) = False
1503+ go _ (Full {}) (Collision {}) = False
1504+ go _ (Full {}) (BitmapIndexed {}) = False
1505+ {-# INLINABLE isSubmapOfBy #-}
1506+
1507+ -- | /O(min n m))/ Checks if a bitmap indexed node is a submap of another.
1508+ submapBitmapIndexed :: (HashMap k v1 -> HashMap k v2 -> Bool ) -> Bitmap -> A. Array (HashMap k v1 ) -> Bitmap -> A. Array (HashMap k v2 ) -> Bool
1509+ submapBitmapIndexed comp ! b1 ! ary1 ! b2 ! ary2 = subsetBitmaps && go 0 0 (b1Orb2 .&. negate b1Orb2)
1510+ where
1511+ go :: Int -> Int -> Bitmap -> Bool
1512+ go ! i ! j ! m
1513+ | m > b1Orb2 = True
1514+
1515+ -- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and
1516+ -- increment the indices i and j.
1517+ | b1Andb2 .&. m /= 0 = comp (A. index ary1 i) (A. index ary2 j) &&
1518+ go (i+ 1 ) (j+ 1 ) (m `unsafeShiftL` 1 )
1519+
1520+ -- In case a key occurs in ary1, but not ary2, only increment index j.
1521+ | b2 .&. m /= 0 = go i (j+ 1 ) (m `unsafeShiftL` 1 )
1522+
1523+ -- In case a key neither occurs in ary1 nor ary2, continue.
1524+ | otherwise = go i j (m `unsafeShiftL` 1 )
1525+
1526+ b1Andb2 = b1 .&. b2
1527+ b1Orb2 = b1 .|. b2
1528+ subsetBitmaps = b1Orb2 == b2
1529+ {-# INLINABLE submapBitmapIndexed #-}
14121530
14131531------------------------------------------------------------------------
14141532-- * Combine
@@ -2076,6 +2194,13 @@ updateOrConcatWithKey f ary1 ary2 = A.run $ do
20762194 return mary
20772195{-# INLINABLE updateOrConcatWithKey #-}
20782196
2197+ -- | /O(n*m)/ Check if the first array is a subset of the second array.
2198+ subsetArray :: Eq k => (v1 -> v2 -> Bool ) -> A. Array (Leaf k v1 ) -> A. Array (Leaf k v2 ) -> Bool
2199+ subsetArray cmpV ary1 ary2 = A. length ary1 <= A. length ary2 && A. all inAry2 ary1
2200+ where
2201+ inAry2 (L k1 v1) = lookupInArrayCont (\ _ -> False ) (\ v2 _ -> cmpV v1 v2) k1 ary2
2202+ {-# INLINE inAry2 #-}
2203+
20792204------------------------------------------------------------------------
20802205-- Manually unrolled loops
20812206
0 commit comments