-----------------------------------------------------------------------------
-- |
-- Module      :  Distribution.Simple.PreProcess.Unlit
-- Copyright   :  ...
--
-- Maintainer  :  cabal-devel@haskell.org
-- Portability :  portable
--
-- Remove the \"literal\" markups from a Haskell source file, including
-- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\"

-- This version is interesting because instead of striping comment lines, it
-- turns them into "-- " style comments. This allows using haddock markup
-- in literate scripts without having to use "> --" prefix.

module Distribution.Simple.PreProcess.Unlit (unlit,plain) where

import Prelude ()
import Distribution.Compat.Prelude

import Data.List (mapAccumL)

data Classified = BirdTrack String | Blank String | Ordinary String
                | Line !Int String | CPP String
                | BeginCode | EndCode
                -- output only:
                | Error String | Comment String

-- | No unliteration.
plain :: String -> String -> String
plain :: String -> String -> String
plain _ hs :: String
hs = String
hs

classify :: String -> Classified
classify :: String -> Classified
classify ('>':s :: String
s) = String -> Classified
BirdTrack String
s
classify ('#':s :: String
s) = case String -> [String]
tokens String
s of
                     (line :: String
line:file :: String
file:_) | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
line
                                  Bool -> Bool -> Bool
&& String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
file Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 2
                                  Bool -> Bool -> Bool
&& String -> Char
forall a. [a] -> a
head String
file Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '"'
                                  Bool -> Bool -> Bool
&& String -> Char
forall a. [a] -> a
last String
file Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '"'
                                -- this shouldn't fail as we tested for 'all isDigit'
                                -> Int -> String -> Classified
Line (Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe (String -> Int
forall a. HasCallStack => String -> a
error (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ "panic! read @Int " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
line) (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ String -> Maybe Int
forall a. Read a => String -> Maybe a
readMaybe String
line) (String -> String
forall a. [a] -> [a]
tail (String -> String
forall a. [a] -> [a]
init String
file)) -- TODO:eradicateNoParse
                     _          -> String -> Classified
CPP String
s
  where tokens :: String -> [String]
tokens = (String -> Maybe (String, String)) -> String -> [String]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr ((String -> Maybe (String, String)) -> String -> [String])
-> (String -> Maybe (String, String)) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ \str :: String
str -> case ReadS String
lex String
str of
                                   (t :: String
t@(_:_), str' :: String
str'):_ -> (String, String) -> Maybe (String, String)
forall a. a -> Maybe a
Just (String
t, String
str')
                                   _                 -> Maybe (String, String)
forall a. Maybe a
Nothing
classify ('\\':s :: String
s)
  | "begin{code}" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
s = Classified
BeginCode
  | "end{code}"   String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
s = Classified
EndCode
classify s :: String
s | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSpace String
s       = String -> Classified
Blank String
s
classify s :: String
s                       = String -> Classified
Ordinary String
s

-- So the weird exception for comment indenting is to make things work with
-- haddock, see classifyAndCheckForBirdTracks below.
unclassify :: Bool -> Classified -> String
unclassify :: Bool -> Classified -> String
unclassify _     (BirdTrack s :: String
s) = ' 'Char -> String -> String
forall a. a -> [a] -> [a]
:String
s
unclassify _     (Blank s :: String
s)     = String
s
unclassify _     (Ordinary s :: String
s)  = String
s
unclassify _     (Line n :: Int
n file :: String
file) = "# " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ " " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
file
unclassify _     (CPP s :: String
s)       = '#'Char -> String -> String
forall a. a -> [a] -> [a]
:String
s
unclassify True  (Comment "")  = "  --"
unclassify True  (Comment s :: String
s)   = "  -- " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s
unclassify False (Comment "")  = "--"
unclassify False (Comment s :: String
s)   = "-- " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s
unclassify _     _             = String
forall a. a
internalError

-- | 'unlit' takes a filename (for error reports), and transforms the
--   given string, to eliminate the literate comments from the program text.
unlit :: FilePath -> String -> Either String String
unlit :: String -> String -> Either String String
unlit file :: String
file input :: String
input =
  let (usesBirdTracks :: Bool
usesBirdTracks, classified :: [Classified]
classified) = [String] -> (Bool, [Classified])
classifyAndCheckForBirdTracks
                                   ([String] -> (Bool, [Classified]))
-> (String -> [String]) -> String -> (Bool, [Classified])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
inlines
                                   (String -> (Bool, [Classified])) -> String -> (Bool, [Classified])
forall a b. (a -> b) -> a -> b
$ String
input
   in ([Classified] -> Either String String)
-> (String -> Either String String)
-> Either [Classified] String
-> Either String String
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> Either String String
forall a b. a -> Either a b
Left (String -> Either String String)
-> ([Classified] -> String) -> [Classified] -> Either String String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unlines ([String] -> String)
-> ([Classified] -> [String]) -> [Classified] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Classified -> String) -> [Classified] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (Bool -> Classified -> String
unclassify Bool
usesBirdTracks))
              String -> Either String String
forall a b. b -> Either a b
Right
    (Either [Classified] String -> Either String String)
-> ([Classified] -> Either [Classified] String)
-> [Classified]
-> Either String String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Classified] -> Either [Classified] String
checkErrors
    ([Classified] -> Either [Classified] String)
-> ([Classified] -> [Classified])
-> [Classified]
-> Either [Classified] String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Classified] -> [Classified]
reclassify
    ([Classified] -> Either String String)
-> [Classified] -> Either String String
forall a b. (a -> b) -> a -> b
$ [Classified]
classified

  where
    -- So haddock requires comments and code to align, since it treats comments
    -- as following the layout rule. This is a pain for us since bird track
    -- style literate code typically gets indented by two since ">" is replaced
    -- by " " and people usually use one additional space of indent ie
    -- "> then the code". On the other hand we cannot just go and indent all
    -- the comments by two since that does not work for latex style literate
    -- code. So the hacky solution we use here is that if we see any bird track
    -- style code then we'll indent all comments by two, otherwise by none.
    -- Of course this will not work for mixed latex/bird track .lhs files but
    -- nobody does that, it's silly and specifically recommended against in the
    -- H98 unlit spec.
    --
    classifyAndCheckForBirdTracks :: [String] -> (Bool, [Classified])
classifyAndCheckForBirdTracks =
      ((Bool -> String -> (Bool, Classified))
 -> Bool -> [String] -> (Bool, [Classified]))
-> Bool
-> (Bool -> String -> (Bool, Classified))
-> [String]
-> (Bool, [Classified])
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Bool -> String -> (Bool, Classified))
-> Bool -> [String] -> (Bool, [Classified])
forall (t :: * -> *) a b c.
Traversable t =>
(a -> b -> (a, c)) -> a -> t b -> (a, t c)
mapAccumL Bool
False ((Bool -> String -> (Bool, Classified))
 -> [String] -> (Bool, [Classified]))
-> (Bool -> String -> (Bool, Classified))
-> [String]
-> (Bool, [Classified])
forall a b. (a -> b) -> a -> b
$ \seenBirdTrack :: Bool
seenBirdTrack line :: String
line ->
        let classification :: Classified
classification = String -> Classified
classify String
line
         in (Bool
seenBirdTrack Bool -> Bool -> Bool
|| Classified -> Bool
isBirdTrack Classified
classification, Classified
classification)

    isBirdTrack :: Classified -> Bool
isBirdTrack (BirdTrack _) = Bool
True
    isBirdTrack _             = Bool
False

    checkErrors :: [Classified] -> Either [Classified] String
checkErrors ls :: [Classified]
ls = case [ String
e | Error e :: String
e <- [Classified]
ls ] of
      []          -> [Classified] -> Either [Classified] String
forall a b. a -> Either a b
Left  [Classified]
ls
      (message :: String
message:_) -> String -> Either [Classified] String
forall a b. b -> Either a b
Right (String
f String -> String -> String
forall a. [a] -> [a] -> [a]
++ ":" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ ": " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
message)
        where (f :: String
f, n :: Int
n) = String -> Int -> [Classified] -> (String, Int)
errorPos String
file 1 [Classified]
ls
    errorPos :: String -> Int -> [Classified] -> (String, Int)
errorPos f :: String
f n :: Int
n []              = (String
f, Int
n)
    errorPos f :: String
f n :: Int
n (Error _:_)     = (String
f, Int
n)
    errorPos _ _ (Line n' :: Int
n' f' :: String
f':ls :: [Classified]
ls) = String -> Int -> [Classified] -> (String, Int)
errorPos String
f' Int
n' [Classified]
ls
    errorPos f :: String
f n :: Int
n (_         :ls :: [Classified]
ls) = String -> Int -> [Classified] -> (String, Int)
errorPos String
f  (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+1) [Classified]
ls

-- Here we model a state machine, with each state represented by
-- a local function. We only have four states (well, five,
-- if you count the error state), but the rules
-- to transition between then are not so simple.
-- Would it be simpler to have more states?
--
-- Each state represents the type of line that was last read
-- i.e. are we in a comment section, or a latex-code section,
-- or a bird-code section, etc?
reclassify :: [Classified] -> [Classified]
reclassify :: [Classified] -> [Classified]
reclassify = [Classified] -> [Classified]
blank -- begin in blank state
  where
    latex :: [Classified] -> [Classified]
latex []               = []
    latex (EndCode    :ls :: [Classified]
ls) = String -> Classified
Blank "" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    latex (BeginCode  :_ ) = [String -> Classified
Error "\\begin{code} in code section"]
    latex (BirdTrack l :: String
l:ls :: [Classified]
ls) = String -> Classified
Ordinary ('>'Char -> String -> String
forall a. a -> [a] -> [a]
:String
l) Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    latex (          l :: Classified
l:ls :: [Classified]
ls) = Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls

    blank :: [Classified] -> [Classified]
blank []               = []
    blank (EndCode    :_ ) = [String -> Classified
Error "\\end{code} without \\begin{code}"]
    blank (BeginCode  :ls :: [Classified]
ls) = String -> Classified
Blank ""    Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    blank (BirdTrack l :: String
l:ls :: [Classified]
ls) = String -> Classified
BirdTrack String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
bird [Classified]
ls
    blank (Ordinary  l :: String
l:ls :: [Classified]
ls) = String -> Classified
Comment   String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    blank (          l :: Classified
l:ls :: [Classified]
ls) =           Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls

    bird :: [Classified] -> [Classified]
bird []              = []
    bird (EndCode   :_ ) = [String -> Classified
Error "\\end{code} without \\begin{code}"]
    bird (BeginCode :ls :: [Classified]
ls) = String -> Classified
Blank "" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    bird (Blank l :: String
l   :ls :: [Classified]
ls) = String -> Classified
Blank String
l  Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls
    bird (Ordinary _:_ ) = [String -> Classified
Error "program line before comment line"]
    bird (         l :: Classified
l:ls :: [Classified]
ls) = Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
bird [Classified]
ls

    comment :: [Classified] -> [Classified]
comment []               = []
    comment (EndCode    :_ ) = [String -> Classified
Error "\\end{code} without \\begin{code}"]
    comment (BeginCode  :ls :: [Classified]
ls) = String -> Classified
Blank "" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    comment (CPP l :: String
l      :ls :: [Classified]
ls) = String -> Classified
CPP String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (BirdTrack _:_ ) = [String -> Classified
Error "comment line before program line"]
    -- a blank line and another ordinary line following a comment
    -- will be treated as continuing the comment. Otherwise it's
    -- then end of the comment, with a blank line.
    comment (Blank     l :: String
l:ls :: [Classified]
ls@(Ordinary  _:_)) = String -> Classified
Comment String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Blank     l :: String
l:ls :: [Classified]
ls) = String -> Classified
Blank String
l   Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls
    comment (Line n :: Int
n f :: String
f   :ls :: [Classified]
ls) = Int -> String -> Classified
Line Int
n String
f  Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Ordinary  l :: String
l:ls :: [Classified]
ls) = String -> Classified
Comment String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Comment   _: _) = [Classified]
forall a. a
internalError
    comment (Error     _: _) = [Classified]
forall a. a
internalError

-- Re-implementation of 'lines', for better efficiency (but decreased laziness).
-- Also, importantly, accepts non-standard DOS and Mac line ending characters.
inlines :: String -> [String]
inlines :: String -> [String]
inlines xs :: String
xs = String -> (String -> String) -> [String]
lines' String
xs String -> String
forall a. a -> a
id
  where
  lines' :: String -> (String -> String) -> [String]
lines' []             acc :: String -> String
acc = [String -> String
acc []]
  lines' ('\^M':'\n':s :: String
s) acc :: String -> String
acc = String -> String
acc [] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> (String -> String) -> [String]
lines' String
s String -> String
forall a. a -> a
id    -- DOS
  lines' ('\^M':s :: String
s)      acc :: String -> String
acc = String -> String
acc [] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> (String -> String) -> [String]
lines' String
s String -> String
forall a. a -> a
id    -- MacOS
  lines' ('\n':s :: String
s)       acc :: String -> String
acc = String -> String
acc [] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> (String -> String) -> [String]
lines' String
s String -> String
forall a. a -> a
id    -- Unix
  lines' (c :: Char
c:s :: String
s)          acc :: String -> String
acc = String -> (String -> String) -> [String]
lines' String
s (String -> String
acc (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:))

internalError :: a
internalError :: a
internalError = String -> a
forall a. HasCallStack => String -> a
error "unlit: internal error"