Business Logic for an Overly Complicated Feedreader

It turns out I don´t have to write much in the way of comments—the source file is already quite well documented.

> {-# LANGUAGE KindSignatures #-}
> 
> module Beuteltier
>        ( -- * Forcing (Executing) 'Thunk's
>          forceAllThunks
>        , WithObjects
>        , forceThunk
>        , resetAllThunks
>        , resetThunk
>          -- * Higher level interactions with a 'Beutel'
>        , replace
>        , eqTo
>        , update
>        , forceAllThunks'
>        , resetAllThunks'
>        , module Beuteltier.Util
>        , module Beuteltier.Types.Common
>        ) where
> 
> import Beuteltier.Types.Common
> import Beuteltier.Util
> 
> import Data.Map (Map)
> import qualified Data.Map as Map
> 
> import Control.Lens
> 
> import qualified Data.ByteString.Lazy as LBS
> import qualified Data.ByteString as BS
> 
> import qualified Data.Text as T
> import qualified Data.Text.IO as T
> import qualified Data.Text.Lazy as TL
> 
> import System.FilePath
> import System.Directory
> import System.Posix.Files
> import System.Posix.Temp
> 
> import System.Environment
> import System.IO
> import GHC.IO.Handle
> import System.Process as P
> import System.Exit
> 
> import Control.Concurrent
> import Control.Applicative
> import Control.Monad.Morph
> import Control.Monad.Trans.State
> import Control.Monad.Writer
> import Control.Monad.Trans.Resource
> import Control.Monad (liftM)
> 
> import Data.Time.Clock
> 
> import Data.BoolExpr

The distinguishing feature of our Overly Complicated Feedreader™ is it´s support for Thunks. It is thus reasonable to expect, that we have some functions to actually interact with them. Most striking in that hypothetical set of functions would be one that executes all thunks associated with a single ObjectGen and return a new one with the content generated by Thunks filled in.

> forceAllThunks :: (MonadIO f, MonadResource f)
>                   => (Thunk -> Bool) -- ^ Select which 'Thunk's to force
>                   -> ObjectGen f -> f (ObjectGen f)
> -- ^ Force all thunks in place and update '_oContent'
> --
> --   The reason we require a 'MonadResource' instance is that we would like to store our expensive to hold in RAM 'SubObject' contents in temporary files.
> forceAllThunks pred = flip alter $ do
>   pureThunks <- use oThunks >>= lift >>= mapM lift
>   (newThunks, objectResults) <- mapAndUnzipM forceThunk' pureThunks
>   assign oThunks $ return $ map return newThunks
>   oContent %= liftM (<> mconcat objectResults)
>   where
>     forceThunk' thunk
>       | pred thunk = forceThunk thunk
>       | otherwise = return (thunk, Map.empty)

forceAllThunks' (resetAllThunks' below, too) is tainted by the evil of generateObject but included for convenience.

> forceAllThunks' :: (MonadIO f, MonadResource f) => (Thunk -> Bool) -> StateT Object f ()
> -- ^ Version of 'forceAllThunks' suitable for use with 'update'
> forceAllThunks' pred = get >>= lift . forceAllThunks pred . liftGen >>= lift . generateObject >>= put
> 
> -- | Internal helper to track computations creating 'SubObject's
> type WithObjects (n :: * -> *) (m :: * -> *) = WriterT (Map SubObjectName (n SubObject)) m
> 
> forceThunk_ :: (MonadIO m, MonadResource m) => Thunk -> WithObjects (ResourceT IO) m Thunk
> -- ^ Make sure the 'ThunkState' contained within a 'Thunk' is 'Executed'
> forceThunk_ = flip alter $ do
>   -- (result, subObjects) <- liftIO $ runWriterT $ parseThunk thunk
>   -- writer ((), subObjects)
>   (result, subObjects) <- listen . hoist (hoist liftResourceT) . lift . parseThunk =<< get
>   tState .= Executed (Map.keys subObjects) result
> 
> forceThunk :: (MonadResource m, MonadResource n) => Thunk -> m (Thunk, Map SubObjectName (n SubObject))
> -- ^ Force a 'Thunk' and return it in 'Executed' state together with the 'SubObject's it created during execution
> forceThunk thunk = liftM (_2 %~ fmap liftResourceT) $ (runWriterT . forceThunk_) thunk

Quite often we want to undue the harm done by forceAllThunks (to save space, usually).

> resetAllThunks :: Monad f
>                   => (Thunk -> Bool) -- ^ Select which 'Thunk's to reset
>                   -> ObjectGen f -> f (ObjectGen f)
> -- ^ Undoes 'forceAllThunks':
> --
> --   prop> forceAllThunks (const True) obj >>= resetAllThunks (const True) >>= forceAllThunks (const True) = forceAllThunks (const True) obj
> --
> --   This inevitably drops information ('ThunkResult's for one).
> --
> --   In the case where 'forceAllThunks' does not drop information (i.e.: no 'SubObjectName' collisions ocurr) the following, stronger property holds:
> --
> --   prop> forceAllThunks (const True) obj >>= resetAllThunks (const True) = return obj
> resetAllThunks pred = flip alter $ do
>   thunks <- liftM (map lift) (use oThunks >>= lift) >>= sequence
>   let
>     (subObjectNames, newThunks) = over _1 concat $ unzip $ map resetThunk' thunks
>   oThunks .= return (map return newThunks)
>   oContent %= (>>= return . Map.filterWithKey (\k _ -> k `notElem` subObjectNames))
>   where
>     resetThunk' thunk
>       | pred thunk = resetThunk thunk
>       | otherwise = ([], thunk)
> 
> resetAllThunks' :: (MonadIO f, MonadResource f) => (Thunk -> Bool) -> StateT Object f ()
> -- ^ Version of 'resetAllThunks' suitable for use with 'update'
> resetAllThunks' pred = get >>= lift . resetAllThunks pred . liftGen >>= lift . generateObject >>= put
> 
> resetThunk :: Thunk -> ([SubObjectName], Thunk)
> -- ^ Reset a thunk and return the 'SubObjectName's of the 'SubObject's it once created.
> --   This forgets information.
> resetThunk thunk = case thunk ^. tState of
>   NotExecuted -> ([], thunk)
>   Executed created _ -> (created, set tState NotExecuted thunk)
> 
> parseThunk :: Thunk -> WithObjects (ResourceT IO) (ResourceT IO) ThunkResult
> -- ^ Generate a runnable action from a 'Thunk'
> --
> --   Regarding the "inner" and "outer" 'Monad' here being 'IO': We have not, at time of forcing, a neccessary connection to our backstore and thus cannot expect the monads to be anything else.
> parseThunk thunk = do
>   tmpDirName <- liftIO getTemporaryDirectory
>   progName <- liftIO getProgName
>   let
>     tmpDirName' = tmpDirName </> progName
>   (_, tmpDir) <- allocate (mkdtemp tmpDirName') removeDirectoryRecursive
>   let exec = tmpDir </> "exec"
>       out = tmpDir </> "out"
>   result <- liftIO $ do
>     createDirectory out
>     LBS.writeFile exec script
>     setFileMode exec $ foldl unionFileModes nullFileMode [ownerReadMode, ownerExecuteMode]
>     (Just std_in, Just std_out, Just std_err, ph) <- createProcess $ (P.proc exec []) { cwd = Just out, std_in = CreatePipe, std_out = CreatePipe, std_err = CreatePipe }
>     hClose std_in
>     hSetBinaryMode std_out True
>     hSetBinaryMode std_err True
>     std_out `sendTo` stdout
>     std_err `sendTo` stderr
>     construct $ do
>       rOutStd <~ TL.fromStrict <$> liftIO (T.hGetContents std_out) -- Yes, sadly we have to be strict here
>       rOutErr <~ TL.fromStrict <$> liftIO (T.hGetContents std_err)
>       rExit <~ toNum <$> liftIO (waitForProcess ph)
>   outputFiles <- liftIO $ getDirectoryContents out
>   let
>     outputFiles' = filter fileFilter outputFiles
>     fileFilter = and . (<*>) [(/=) ".", (/=) ".."] . pure . takeFileName
>   mapM_ tell =<< mapM (liftResourceT . toSubObject) outputFiles'
>   return result
>   where
>     script = thunk ^. tScript
>     toSubObject :: FilePath -> ResIO (Map SubObjectName (ResIO SubObject))
>     -- ^ Using 'ResourceT' provides us with the guarantee, that the 'FilePath' we´re referring to should still exist when we actually try to get the 'SubObject'´s contents
>     toSubObject name =  fmap (Map.singleton name' . return) $ construct $ do
>       sContent <~ liftIO (LBS.readFile name)
>       sUpdates <~ pure <$> liftIO getCurrentTime
>       where
>         name' = takeFileName name
>     sendTo input output = do
>       input' <- hDuplicate input
>       forkIO $ do
>         hSetBuffering input' NoBuffering
>         LBS.hGetContents input' >>= LBS.hPutStr output
>       return ()
>     toNum :: Num a => ExitCode -> a
>     toNum ExitSuccess = 0
>     toNum (ExitFailure i) = fromInteger $ toInteger i

We provide update, a convenience function for high-level interactions (though costly on large sets of equivalent objects (which should not exist due to nubbing)).

> eqTo :: Monad f => Object -> SearchQuery f
> -- ^ @eqTo o@ constructs a 'SearchQuery' that matches all 'Object's 'Equivalent' to @o@
> --
> --   This is costly because it calls 'generateObject' on the contents of the entire 'Beutel'.
> eqTo o = BConst ((>>= return . (~~) o) . generateObject)
> 
> update :: Beutel f => SearchQuery f -> StateT Object f a -> f ()
> -- ^ @update search action@ replaces /all/ 'Object's matching @search@ within the 'Beutel' by versions of themselves modified by applying @action@.
> --
> --   Does not handle '_sUpdates'.
> --
> --   This is costly because it calls 'generateObject' on the contents of the entire 'Beutel' /and/ all results of @search@ (in order to use 'Eq' on 'Object's to delete the results of the initial 'search').
> update query alteration = do
>   matches <- search query
>   matches' <- mapM generateObject matches
>   delete $ BConst ((>>= return . (`elem` matches')) . generateObject)
>   mapM_ (\o -> alter o alteration >>= insert) matches'
>   return ()

dirty-haskell.org: Business Logic for an Overly Complicated Feedreader

It's not crazy — it's having fun with types.