-- | An implementation of Tarjan's UNION-FIND algorithm.  (Robert E
-- Tarjan. \"Efficiency of a Good But Not Linear Set Union Algorithm\", JACM
-- 22(2), 1975)
--
-- The algorithm implements three operations efficiently (all amortised
-- @O(1)@):
--
--  1. Check whether two elements are in the same equivalence class.
--
--  2. Create a union of two equivalence classes.
--
--  3. Look up the descriptor of the equivalence class.
-- 
-- The implementation is based on mutable references.  Each
-- equivalence class has exactly one member that serves as its
-- representative element.  Every element either is the representative
-- element of its equivalence class or points to another element in
-- the same equivalence class.  Equivalence testing thus consists of
-- following the pointers to the representative elements and then
-- comparing these for identity.
--
-- The algorithm performs lazy path compression.  That is, whenever we
-- walk along a path greater than length 1 we automatically update the
-- pointers along the path to directly point to the representative
-- element.  Consequently future lookups will be have a path length of
-- at most 1.
--
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Data.UnionFind.ST
  ( Point, fresh, repr, union, union', equivalent, redundant,
    descriptor, setDescriptor, modifyDescriptor )
where

import Control.Applicative
import Control.Monad ( when )
import Control.Monad.ST
import Data.STRef

-- | The abstract type of an element of the sets we work on.  It is
-- parameterised over the type of the descriptor.
newtype Point s a = Pt (STRef s (Link s a)) deriving Point s a -> Point s a -> Bool
(Point s a -> Point s a -> Bool)
-> (Point s a -> Point s a -> Bool) -> Eq (Point s a)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall s a. Point s a -> Point s a -> Bool
/= :: Point s a -> Point s a -> Bool
$c/= :: forall s a. Point s a -> Point s a -> Bool
== :: Point s a -> Point s a -> Bool
$c== :: forall s a. Point s a -> Point s a -> Bool
Eq

data Link s a
    = Info {-# UNPACK #-} !(STRef s (Info a))
      -- ^ This is the descriptive element of the equivalence class.
    | Link {-# UNPACK #-} !(Point s a)
      -- ^ Pointer to some other element of the equivalence class.
     deriving Link s a -> Link s a -> Bool
(Link s a -> Link s a -> Bool)
-> (Link s a -> Link s a -> Bool) -> Eq (Link s a)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall s a. Link s a -> Link s a -> Bool
/= :: Link s a -> Link s a -> Bool
$c/= :: forall s a. Link s a -> Link s a -> Bool
== :: Link s a -> Link s a -> Bool
$c== :: forall s a. Link s a -> Link s a -> Bool
Eq

data Info a = MkInfo
  { Info a -> Int
weight :: {-# UNPACK #-} !Int
    -- ^ The size of the equivalence class, used by 'union'.
  , Info a -> a
descr  :: a
  } deriving Info a -> Info a -> Bool
(Info a -> Info a -> Bool)
-> (Info a -> Info a -> Bool) -> Eq (Info a)
forall a. Eq a => Info a -> Info a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Info a -> Info a -> Bool
$c/= :: forall a. Eq a => Info a -> Info a -> Bool
== :: Info a -> Info a -> Bool
$c== :: forall a. Eq a => Info a -> Info a -> Bool
Eq

-- | /O(1)/. Create a fresh point and return it.  A fresh point is in
-- the equivalence class that contains only itself.
fresh :: a -> ST s (Point s a)
fresh :: a -> ST s (Point s a)
fresh desc :: a
desc = do
  STRef s (Info a)
info <- Info a -> ST s (STRef s (Info a))
forall a s. a -> ST s (STRef s a)
newSTRef ($WMkInfo :: forall a. Int -> a -> Info a
MkInfo { weight :: Int
weight = 1, descr :: a
descr = a
desc })
  STRef s (Link s a)
l <- Link s a -> ST s (STRef s (Link s a))
forall a s. a -> ST s (STRef s a)
newSTRef (STRef s (Info a) -> Link s a
forall s a. STRef s (Info a) -> Link s a
Info STRef s (Info a)
info)
  Point s a -> ST s (Point s a)
forall (m :: * -> *) a. Monad m => a -> m a
return (STRef s (Link s a) -> Point s a
forall s a. STRef s (Link s a) -> Point s a
Pt STRef s (Link s a)
l)

-- | /O(1)/. @repr point@ returns the representative point of
-- @point@'s equivalence class.
--
-- This method performs the path compresssion.
repr :: Point s a -> ST s (Point s a)
repr :: Point s a -> ST s (Point s a)
repr point :: Point s a
point@(Pt l :: STRef s (Link s a)
l) = do
  Link s a
link <- STRef s (Link s a) -> ST s (Link s a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Link s a)
l
  case Link s a
link of
    Info _ -> Point s a -> ST s (Point s a)
forall (m :: * -> *) a. Monad m => a -> m a
return Point s a
point
    Link pt' :: Point s a
pt'@(Pt l' :: STRef s (Link s a)
l') -> do
      Point s a
pt'' <- Point s a -> ST s (Point s a)
forall s a. Point s a -> ST s (Point s a)
repr Point s a
pt'
      Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Point s a
pt'' Point s a -> Point s a -> Bool
forall a. Eq a => a -> a -> Bool
/= Point s a
pt') (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
        -- At this point we know that @pt'@ is not the representative
        -- element of @point@'s equivalent class.  Therefore @pt'@'s
        -- link must be of the form @Link r@.  We write this same
        -- value into @point@'s link reference and thereby perform
        -- path compression.
        Link s a
link' <- STRef s (Link s a) -> ST s (Link s a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Link s a)
l'
        STRef s (Link s a) -> Link s a -> ST s ()
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (Link s a)
l Link s a
link'
      Point s a -> ST s (Point s a)
forall (m :: * -> *) a. Monad m => a -> m a
return Point s a
pt''

-- | Return the reference to the point's equivalence class's
-- descriptor.
descrRef :: Point s a -> ST s (STRef s (Info a))
descrRef :: Point s a -> ST s (STRef s (Info a))
descrRef point :: Point s a
point@(Pt link_ref :: STRef s (Link s a)
link_ref) = do
  Link s a
link <- STRef s (Link s a) -> ST s (Link s a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Link s a)
link_ref
  case Link s a
link of
    Info info :: STRef s (Info a)
info -> STRef s (Info a) -> ST s (STRef s (Info a))
forall (m :: * -> *) a. Monad m => a -> m a
return STRef s (Info a)
info
    Link (Pt link'_ref :: STRef s (Link s a)
link'_ref) -> do
      Link s a
link' <- STRef s (Link s a) -> ST s (Link s a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Link s a)
link'_ref
      case Link s a
link' of
        Info info :: STRef s (Info a)
info -> STRef s (Info a) -> ST s (STRef s (Info a))
forall (m :: * -> *) a. Monad m => a -> m a
return STRef s (Info a)
info
        _ -> Point s a -> ST s (STRef s (Info a))
forall s a. Point s a -> ST s (STRef s (Info a))
descrRef (Point s a -> ST s (STRef s (Info a)))
-> ST s (Point s a) -> ST s (STRef s (Info a))
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Point s a -> ST s (Point s a)
forall s a. Point s a -> ST s (Point s a)
repr Point s a
point

-- | /O(1)/. Return the descriptor associated with argument point's
-- equivalence class.
descriptor :: Point s a -> ST s a
descriptor :: Point s a -> ST s a
descriptor point :: Point s a
point = do
  Info a -> a
forall a. Info a -> a
descr (Info a -> a) -> ST s (Info a) -> ST s a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (STRef s (Info a) -> ST s (Info a)
forall s a. STRef s a -> ST s a
readSTRef (STRef s (Info a) -> ST s (Info a))
-> ST s (STRef s (Info a)) -> ST s (Info a)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Point s a -> ST s (STRef s (Info a))
forall s a. Point s a -> ST s (STRef s (Info a))
descrRef Point s a
point)

-- | /O(1)/. Replace the descriptor of the point's equivalence class
-- with the second argument.
setDescriptor :: Point s a -> a -> ST s ()
setDescriptor :: Point s a -> a -> ST s ()
setDescriptor point :: Point s a
point new_descr :: a
new_descr = do
  STRef s (Info a)
r <- Point s a -> ST s (STRef s (Info a))
forall s a. Point s a -> ST s (STRef s (Info a))
descrRef Point s a
point
  STRef s (Info a) -> (Info a -> Info a) -> ST s ()
forall s a. STRef s a -> (a -> a) -> ST s ()
modifySTRef STRef s (Info a)
r ((Info a -> Info a) -> ST s ()) -> (Info a -> Info a) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \i :: Info a
i -> Info a
i { descr :: a
descr = a
new_descr }

modifyDescriptor :: Point s a -> (a -> a) -> ST s ()
modifyDescriptor :: Point s a -> (a -> a) -> ST s ()
modifyDescriptor point :: Point s a
point f :: a -> a
f = do
  STRef s (Info a)
r <- Point s a -> ST s (STRef s (Info a))
forall s a. Point s a -> ST s (STRef s (Info a))
descrRef Point s a
point
  STRef s (Info a) -> (Info a -> Info a) -> ST s ()
forall s a. STRef s a -> (a -> a) -> ST s ()
modifySTRef STRef s (Info a)
r ((Info a -> Info a) -> ST s ()) -> (Info a -> Info a) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \i :: Info a
i -> Info a
i { descr :: a
descr = a -> a
f (Info a -> a
forall a. Info a -> a
descr Info a
i) }

-- | /O(1)/. Join the equivalence classes of the points (which must be
-- distinct).  The resulting equivalence class will get the descriptor
-- of the second argument.
union :: Point s a -> Point s a -> ST s ()
union :: Point s a -> Point s a -> ST s ()
union p1 :: Point s a
p1 p2 :: Point s a
p2 = Point s a -> Point s a -> (a -> a -> ST s a) -> ST s ()
forall s a. Point s a -> Point s a -> (a -> a -> ST s a) -> ST s ()
union' Point s a
p1 Point s a
p2 (\_ d2 :: a
d2 -> a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return a
d2)

-- | Like 'union', but sets the descriptor returned from the callback.
-- 
-- The intention is to keep the descriptor of the second argument to
-- the callback, but the callback might adjust the information of the
-- descriptor or perform side effects.
union' :: Point s a -> Point s a -> (a -> a -> ST s a) -> ST s ()
union' :: Point s a -> Point s a -> (a -> a -> ST s a) -> ST s ()
union' p1 :: Point s a
p1 p2 :: Point s a
p2 update :: a -> a -> ST s a
update = do
  point1 :: Point s a
point1@(Pt link_ref1 :: STRef s (Link s a)
link_ref1) <- Point s a -> ST s (Point s a)
forall s a. Point s a -> ST s (Point s a)
repr Point s a
p1
  point2 :: Point s a
point2@(Pt link_ref2 :: STRef s (Link s a)
link_ref2) <- Point s a -> ST s (Point s a)
forall s a. Point s a -> ST s (Point s a)
repr Point s a
p2
  -- The precondition ensures that we don't create cyclic structures.
  Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Point s a
point1 Point s a -> Point s a -> Bool
forall a. Eq a => a -> a -> Bool
/= Point s a
point2) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
    Info info_ref1 :: STRef s (Info a)
info_ref1 <- STRef s (Link s a) -> ST s (Link s a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Link s a)
link_ref1
    Info info_ref2 :: STRef s (Info a)
info_ref2 <- STRef s (Link s a) -> ST s (Link s a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Link s a)
link_ref2
    MkInfo w1 :: Int
w1 d1 :: a
d1 <- STRef s (Info a) -> ST s (Info a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Info a)
info_ref1 -- d1 is discarded
    MkInfo w2 :: Int
w2 d2 :: a
d2 <- STRef s (Info a) -> ST s (Info a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Info a)
info_ref2
    a
d2' <- a -> a -> ST s a
update a
d1 a
d2
    -- Make the smaller tree a a subtree of the bigger one.  The idea
    -- is this: We increase the path length of one set by one.
    -- Assuming all elements are accessed equally often, this means
    -- the penalty is smaller if we do it for the smaller set of the
    -- two.
    if Int
w1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
w2 then do
      STRef s (Link s a) -> Link s a -> ST s ()
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (Link s a)
link_ref2 (Point s a -> Link s a
forall s a. Point s a -> Link s a
Link Point s a
point1)
      STRef s (Info a) -> Info a -> ST s ()
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (Info a)
info_ref1 (Int -> a -> Info a
forall a. Int -> a -> Info a
MkInfo (Int
w1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
w2) a
d2')
     else do
      STRef s (Link s a) -> Link s a -> ST s ()
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (Link s a)
link_ref1 (Point s a -> Link s a
forall s a. Point s a -> Link s a
Link Point s a
point2)
      STRef s (Info a) -> Info a -> ST s ()
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (Info a)
info_ref2 (Int -> a -> Info a
forall a. Int -> a -> Info a
MkInfo (Int
w1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
w2) a
d2')

-- | /O(1)/. Return @True@ if both points belong to the same
-- | equivalence class.
equivalent :: Point s a -> Point s a -> ST s Bool
equivalent :: Point s a -> Point s a -> ST s Bool
equivalent p1 :: Point s a
p1 p2 :: Point s a
p2 = Point s a -> Point s a -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Point s a -> Point s a -> Bool)
-> ST s (Point s a) -> ST s (Point s a -> Bool)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Point s a -> ST s (Point s a)
forall s a. Point s a -> ST s (Point s a)
repr Point s a
p1 ST s (Point s a -> Bool) -> ST s (Point s a) -> ST s Bool
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Point s a -> ST s (Point s a)
forall s a. Point s a -> ST s (Point s a)
repr Point s a
p2

-- | /O(1)/. Returns @True@ for all but one element of an equivalence
-- class.  That is, if @ps = [p1, .., pn]@ are all in the same
-- equivalence class, then the following assertion holds.
-- 
-- > do rs <- mapM redundant ps
-- >    assert (length (filter (==False) rs) == 1)
-- 
-- It is unspecified for which element function returns @False@, so be
-- really careful when using this.
redundant :: Point s a -> ST s Bool
redundant :: Point s a -> ST s Bool
redundant (Pt link_r :: STRef s (Link s a)
link_r) = do
  Link s a
link <- STRef s (Link s a) -> ST s (Link s a)
forall s a. STRef s a -> ST s a
readSTRef STRef s (Link s a)
link_r
  case Link s a
link of
    Info _ -> Bool -> ST s Bool
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False
    Link _ -> Bool -> ST s Bool
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True