docbook2txt: halfway finish `<informaltable>`
This required a lot of restructuring, which is why I won't put this on another branch. A part of the library which is updated in github but not yet at hackage, is needed to continue. Specifically, the `Monoid` instance in `Text.Layout.Table.Cell.Formatted` is needed to put together multiple colored text in a table. Among other things introduced, there is now our own `Formatted` type which is used to await formatting until things like tables come up. Compared to the earlier `IO ()` approach, this is probably a lot better.
This commit is contained in:
parent
4099bbce8d
commit
51ab2cf393
|
@ -1,4 +1,4 @@
|
|||
{ pkgs, compiler ? "ghc924", ... }:
|
||||
pkgs.writers.writeHaskellBin "docbook2txt" {
|
||||
libraries = with pkgs.haskellPackages; [ tagsoup ansi-terminal split text ];
|
||||
libraries = with pkgs.haskellPackages; [ tagsoup ansi-terminal split table-layout text ];
|
||||
} (builtins.readFile ./docbook2txt.hs)
|
||||
|
|
|
@ -8,16 +8,93 @@
|
|||
-- end up having to write custom conversion logic for every tag to be
|
||||
-- consumed by pandoc anyway. So instead, I am just planning on keeping
|
||||
-- my own module parsing raw xml tags (for now).
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE PatternSynonyms #-}
|
||||
|
||||
import Data.Char (isSpace)
|
||||
import Data.List (find, intersperse)
|
||||
import Data.List.Split (splitOn)
|
||||
import qualified System.Console.ANSI as AN
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.String (IsString, fromString)
|
||||
import qualified System.Console.ANSI.Codes as AN
|
||||
import qualified System.Console.ANSI.Types as AN
|
||||
import System.Environment (getArgs)
|
||||
import qualified Text.HTML.TagSoup as TS
|
||||
import qualified Text.HTML.TagSoup.Tree as TS
|
||||
-- import qualified Text.Layout.Table as T
|
||||
-- import qualified Text.Layout.Table.Cell as T
|
||||
-- import qualified Text.Layout.Table.Cell.Formatted as T
|
||||
|
||||
--
|
||||
-- Datatypes with relevant methods
|
||||
--
|
||||
|
||||
-- TODO: Mark reflowable text, and do a reflow fold before print.
|
||||
|
||||
-- data ShouldReflow a = SRReflow a
|
||||
-- | SRConstant a
|
||||
-- deriving (Show, Eq)
|
||||
|
||||
data Formatted
|
||||
= FSeveral [Formatted]
|
||||
| FWrapped String Formatted String
|
||||
| FPlain String
|
||||
deriving (Eq)
|
||||
|
||||
instance IsString Formatted where
|
||||
fromString = FPlain
|
||||
|
||||
instance Show Formatted where
|
||||
show (FSeveral fs) = concatMap show fs
|
||||
show (FWrapped w1 s w2) = concat [w1, show s, w2]
|
||||
show (FPlain s) = s
|
||||
|
||||
instance Semigroup Formatted where
|
||||
FSeveral x <> FSeveral y = FSeveral $ x ++ y
|
||||
f1 <> FSeveral x = FSeveral $ f1 : x
|
||||
FSeveral x <> f1 = FSeveral $ x ++ [f1]
|
||||
f1 <> f2 = FSeveral [f1, f2]
|
||||
|
||||
instance Monoid Formatted where
|
||||
mempty = FPlain mempty
|
||||
|
||||
realString :: Formatted -> String
|
||||
realString (FSeveral fs) = concatMap realString fs
|
||||
realString (FWrapped w1 s w2) = realString s
|
||||
realString (FPlain s) = s
|
||||
|
||||
realLength :: Formatted -> Int
|
||||
realLength = length . realString
|
||||
|
||||
-- TODO: Revisit when table-layout gets a new release
|
||||
-- https://github.com/muesli4/table-layout/issues/43
|
||||
|
||||
-- toTableFormattedType :: Formatted -> T.Formatted String
|
||||
-- toTableFormattedType f = case f of
|
||||
-- FSeveral fs -> mconcatMap toTableFormattedType fs
|
||||
-- FWrapped w1 (FPlain s) w2 -> T.formatted w1 s w2
|
||||
-- FWrapped _ f1 _ -> toTableFormattedType f1
|
||||
-- FPlain s -> T.plain s
|
||||
|
||||
data PotentiallyColorizedString = PCS
|
||||
{ colorized :: Formatted,
|
||||
nonColorized :: String
|
||||
}
|
||||
deriving (Show, Eq)
|
||||
|
||||
instance Semigroup PotentiallyColorizedString where
|
||||
pcs1 <> pcs2 =
|
||||
PCS
|
||||
{ colorized = colorized pcs1 <> colorized pcs2,
|
||||
nonColorized = nonColorized pcs1 <> nonColorized pcs2
|
||||
}
|
||||
|
||||
instance Monoid PotentiallyColorizedString where
|
||||
mempty =
|
||||
PCS
|
||||
{ colorized = mempty,
|
||||
nonColorized = mempty
|
||||
}
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
|
@ -26,10 +103,32 @@ main = do
|
|||
let colorizedMode = "-C" `elem` args
|
||||
printTags colorizedMode $ map replaceTagColor $ removeParagraphTags $ TS.parseTree stdin
|
||||
|
||||
data PotentiallyColorizedString = PCS
|
||||
{ colorized :: IO (),
|
||||
nonColorized :: String
|
||||
}
|
||||
-- Print a list of PCSs.
|
||||
-- Depending on the first argument, the color can be optionally
|
||||
-- colored.
|
||||
printTags :: Bool -> [PotentiallyColorizedString] -> IO ()
|
||||
printTags False = putStrLn . unwords . map nonColorized
|
||||
printTags True = putStrLn . mconcatMap (show . colorized)
|
||||
|
||||
-- ANSI helpers
|
||||
|
||||
wrapSGR :: AN.SGR -> String -> Formatted
|
||||
wrapSGR sgr str = FWrapped (AN.setSGRCode [sgr]) (fromString str) (AN.setSGRCode [AN.Reset])
|
||||
|
||||
wrapColor :: AN.Color -> String -> Formatted
|
||||
wrapColor = wrapSGR . AN.SetColor AN.Foreground AN.Vivid
|
||||
|
||||
wrapTxt :: String -> String -> String
|
||||
wrapTxt delimiter string = concat [delimiter, string, delimiter]
|
||||
|
||||
wrapTxt' :: String -> Formatted -> Formatted
|
||||
wrapTxt' delimiter string = FSeveral [fromString delimiter, string, fromString delimiter]
|
||||
|
||||
bold :: AN.SGR
|
||||
bold = AN.SetConsoleIntensity AN.BoldIntensity
|
||||
|
||||
mconcatMap :: Monoid b => (a -> b) -> [a] -> b
|
||||
mconcatMap f = mconcat . map f
|
||||
|
||||
-- Remove `</para><para>` tags.
|
||||
-- If there are more in one doc comment, the middle ones
|
||||
|
@ -45,30 +144,6 @@ removeParagraphTags (TS.TagLeaf (TS.TagOpen "para" _) : rest) = removeParagraphT
|
|||
removeParagraphTags (x : y : rest) = x : removeParagraphTags (y : rest)
|
||||
removeParagraphTags x = x
|
||||
|
||||
-- Print a list of PCSs.
|
||||
-- Depending on the first argument, the color can be optionally
|
||||
-- colored.
|
||||
printTags :: Bool -> [PotentiallyColorizedString] -> IO ()
|
||||
printTags False = putStrLn . unwords . map nonColorized
|
||||
printTags True = mapM_ colorized
|
||||
|
||||
-- ANSI helpers
|
||||
|
||||
wrapSGR :: AN.SGR -> String -> IO ()
|
||||
wrapSGR sgr str = do
|
||||
AN.setSGR [sgr]
|
||||
putStr str
|
||||
AN.setSGR [AN.Reset]
|
||||
|
||||
wrapColor :: AN.Color -> String -> IO ()
|
||||
wrapColor c = wrapSGR (AN.SetColor AN.Foreground AN.Vivid c)
|
||||
|
||||
wrapTxt :: String -> String -> String
|
||||
wrapTxt delimiter string = concat [delimiter, string, delimiter]
|
||||
|
||||
bold :: AN.SGR
|
||||
bold = AN.SetConsoleIntensity AN.BoldIntensity
|
||||
|
||||
pattern TextLeaf a = TS.TagLeaf (TS.TagText a)
|
||||
|
||||
-- Replace tags with their PCS string equivalent.
|
||||
|
@ -76,12 +151,12 @@ replaceTagColor :: TS.TagTree String -> PotentiallyColorizedString
|
|||
replaceTagColor tagtree = case tagtree of
|
||||
TS.TagLeaf (TS.TagText s) ->
|
||||
PCS
|
||||
{ colorized = putStr s,
|
||||
{ colorized = fromString s,
|
||||
nonColorized = s
|
||||
}
|
||||
TS.TagBranch "para" _ inner ->
|
||||
PCS
|
||||
{ colorized = mapM_ (colorized . replaceTagColor) inner,
|
||||
{ colorized = mconcatMap (colorized . replaceTagColor) inner,
|
||||
nonColorized = concatMap (nonColorized . replaceTagColor) inner
|
||||
}
|
||||
TS.TagBranch "code" _ [TextLeaf content] ->
|
||||
|
@ -136,12 +211,12 @@ replaceTagColor tagtree = case tagtree of
|
|||
}
|
||||
TS.TagBranch "quote" _ inner ->
|
||||
PCS
|
||||
{ colorized = sequence_ [putStr "\"", mapM_ (colorized . replaceTagColor) inner, putStr "\""],
|
||||
{ colorized = wrapTxt' "\"" $ mconcatMap (colorized . replaceTagColor) inner,
|
||||
nonColorized = wrapTxt "\"" $ concatMap (nonColorized . replaceTagColor) inner
|
||||
}
|
||||
TS.TagBranch "warning" _ inner ->
|
||||
PCS
|
||||
{ colorized = sequence_ [wrapColor AN.Red "WARNING: ", mapM_ (colorized . replaceTagColor) inner],
|
||||
{ colorized = mconcat [wrapColor AN.Red "WARNING: ", mconcatMap (colorized . replaceTagColor) inner],
|
||||
nonColorized = "WARNING: " ++ concatMap (nonColorized . replaceTagColor) inner
|
||||
}
|
||||
TS.TagBranch "xref" [("linkend", link)] [] ->
|
||||
|
@ -149,15 +224,57 @@ replaceTagColor tagtree = case tagtree of
|
|||
removeOptPrefix ('o' : 'p' : 't' : '-' : rest) = rest
|
||||
removeOptPrefix x = x
|
||||
|
||||
replaceName :: String -> IO ()
|
||||
replaceName x = if x == "_name_" then wrapColor AN.Red "<name>" else wrapSGR bold x
|
||||
replaceName :: String -> Formatted
|
||||
replaceName "_name_" = wrapColor AN.Red "<name>"
|
||||
replaceName x = wrapSGR bold x
|
||||
|
||||
formattedLink :: [IO ()]
|
||||
formattedLink = intersperse (wrapSGR bold ".") $ map replaceName $ splitOn "." $ removeOptPrefix link
|
||||
formattedLink :: Formatted
|
||||
formattedLink = mconcat $ intersperse (wrapSGR bold ".") $ map replaceName $ splitOn "." $ removeOptPrefix link
|
||||
in PCS
|
||||
{ colorized = sequence_ $ [putStr "`"] ++ formattedLink ++ [putStr "`"],
|
||||
{ colorized = wrapTxt' "`" formattedLink,
|
||||
nonColorized = wrapTxt "`" link
|
||||
}
|
||||
-- TS.TagBranch "informaltable" _ [inner] ->
|
||||
-- let
|
||||
-- extractRows :: TS.TagTree String -> Maybe [TS.TagTree String]
|
||||
-- extractRows (TS.TagBranch "tgroup" _ [TS.TagBranch "tbody" _ rows]) = Just rows
|
||||
-- extractRows _ = Nothing
|
||||
|
||||
-- -- TODO: This filters too much
|
||||
-- isWhiteSpace :: TS.TagTree String -> Bool
|
||||
-- isWhiteSpace (TextLeaf s) = False
|
||||
-- isWhiteSpace _ = True
|
||||
|
||||
-- parseRow :: TS.TagTree String -> Maybe [PotentiallyColorizedString]
|
||||
-- parseRow (TS.TagBranch "row" _ entries) = sequence $ map parseEntry $ filter isWhiteSpace entries
|
||||
-- parseRow _ = Nothing
|
||||
|
||||
-- parseEntry :: TS.TagTree String -> Maybe PotentiallyColorizedString
|
||||
-- parseEntry (TS.TagBranch "entry" _ content) = Just $ mconcatMap replaceTagColor content
|
||||
-- parseEntry _ = Nothing
|
||||
|
||||
-- rawRows :: Maybe [[PotentiallyColorizedString]]
|
||||
-- rawRows = do
|
||||
-- rows <- extractRows inner
|
||||
-- sequence $ map parseRow $ filter isWhiteSpace rows
|
||||
|
||||
-- generateColSpec :: [[a]] -> [T.ColSpec]
|
||||
-- generateColSpec rs = flip replicate T.def $ length $ rs !! 0
|
||||
|
||||
-- generateTableConfig :: T.Cell a => [[b]] -> [T.RowGroup a] -> String
|
||||
-- generateTableConfig rs = (++) "\n" . T.tableString (generateColSpec rs) T.unicodeRoundS T.def
|
||||
|
||||
-- table :: T.Cell a => (PotentiallyColorizedString -> a) -> Maybe String
|
||||
-- table f = case rawRows of
|
||||
-- Nothing -> Nothing
|
||||
-- Just rrs -> Just $ generateTableConfig rrs $ map (T.rowG . map f) $ rrs
|
||||
|
||||
-- errorMessage :: String
|
||||
-- errorMessage = "ERROR: Could not parse <informaltable>";
|
||||
-- in PCS {
|
||||
-- colorized = fromMaybe (wrapColor AN.Red errorMessage) (FPlain <$> table (toTableFormattedType . colorized)),
|
||||
-- nonColorized = fromMaybe errorMessage $ table nonColorized
|
||||
-- }
|
||||
TS.TagBranch "citerefentry" _ content ->
|
||||
let tagBranchTagMatches :: String -> TS.TagTree String -> Bool
|
||||
tagBranchTagMatches x (TS.TagBranch tag _ _) = tag == x
|
||||
|
|
Loading…
Reference in New Issue