docbook2txt: halfway finish `<informaltable>`

This required a lot of restructuring, which is why I won't
put this on another branch.

A part of the library which is updated in github but not yet at hackage,
is needed to continue. Specifically, the `Monoid` instance in
`Text.Layout.Table.Cell.Formatted` is needed to put together multiple
colored text in a table.

Among other things introduced, there is now our own `Formatted` type
which is used to await formatting until things like tables come up.
Compared to the earlier `IO ()` approach, this is probably a lot better.
This commit is contained in:
Oystein Kristoffer Tveit 2022-11-30 20:21:20 +01:00
parent 4099bbce8d
commit 51ab2cf393
Signed by: oysteikt
GPG Key ID: 9F2F7D8250F35146
2 changed files with 157 additions and 40 deletions

View File

@ -1,4 +1,4 @@
{ pkgs, compiler ? "ghc924", ... }: { pkgs, compiler ? "ghc924", ... }:
pkgs.writers.writeHaskellBin "docbook2txt" { pkgs.writers.writeHaskellBin "docbook2txt" {
libraries = with pkgs.haskellPackages; [ tagsoup ansi-terminal split text ]; libraries = with pkgs.haskellPackages; [ tagsoup ansi-terminal split table-layout text ];
} (builtins.readFile ./docbook2txt.hs) } (builtins.readFile ./docbook2txt.hs)

View File

@ -8,16 +8,93 @@
-- end up having to write custom conversion logic for every tag to be -- end up having to write custom conversion logic for every tag to be
-- consumed by pandoc anyway. So instead, I am just planning on keeping -- consumed by pandoc anyway. So instead, I am just planning on keeping
-- my own module parsing raw xml tags (for now). -- my own module parsing raw xml tags (for now).
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE PatternSynonyms #-}
import Data.Char (isSpace)
import Data.List (find, intersperse) import Data.List (find, intersperse)
import Data.List.Split (splitOn) import Data.List.Split (splitOn)
import qualified System.Console.ANSI as AN import Data.Maybe (fromMaybe)
import Data.String (IsString, fromString)
import qualified System.Console.ANSI.Codes as AN
import qualified System.Console.ANSI.Types as AN import qualified System.Console.ANSI.Types as AN
import System.Environment (getArgs) import System.Environment (getArgs)
import qualified Text.HTML.TagSoup as TS import qualified Text.HTML.TagSoup as TS
import qualified Text.HTML.TagSoup.Tree as TS import qualified Text.HTML.TagSoup.Tree as TS
-- import qualified Text.Layout.Table as T
-- import qualified Text.Layout.Table.Cell as T
-- import qualified Text.Layout.Table.Cell.Formatted as T
--
-- Datatypes with relevant methods
--
-- TODO: Mark reflowable text, and do a reflow fold before print.
-- data ShouldReflow a = SRReflow a
-- | SRConstant a
-- deriving (Show, Eq)
data Formatted
= FSeveral [Formatted]
| FWrapped String Formatted String
| FPlain String
deriving (Eq)
instance IsString Formatted where
fromString = FPlain
instance Show Formatted where
show (FSeveral fs) = concatMap show fs
show (FWrapped w1 s w2) = concat [w1, show s, w2]
show (FPlain s) = s
instance Semigroup Formatted where
FSeveral x <> FSeveral y = FSeveral $ x ++ y
f1 <> FSeveral x = FSeveral $ f1 : x
FSeveral x <> f1 = FSeveral $ x ++ [f1]
f1 <> f2 = FSeveral [f1, f2]
instance Monoid Formatted where
mempty = FPlain mempty
realString :: Formatted -> String
realString (FSeveral fs) = concatMap realString fs
realString (FWrapped w1 s w2) = realString s
realString (FPlain s) = s
realLength :: Formatted -> Int
realLength = length . realString
-- TODO: Revisit when table-layout gets a new release
-- https://github.com/muesli4/table-layout/issues/43
-- toTableFormattedType :: Formatted -> T.Formatted String
-- toTableFormattedType f = case f of
-- FSeveral fs -> mconcatMap toTableFormattedType fs
-- FWrapped w1 (FPlain s) w2 -> T.formatted w1 s w2
-- FWrapped _ f1 _ -> toTableFormattedType f1
-- FPlain s -> T.plain s
data PotentiallyColorizedString = PCS
{ colorized :: Formatted,
nonColorized :: String
}
deriving (Show, Eq)
instance Semigroup PotentiallyColorizedString where
pcs1 <> pcs2 =
PCS
{ colorized = colorized pcs1 <> colorized pcs2,
nonColorized = nonColorized pcs1 <> nonColorized pcs2
}
instance Monoid PotentiallyColorizedString where
mempty =
PCS
{ colorized = mempty,
nonColorized = mempty
}
main :: IO () main :: IO ()
main = do main = do
@ -26,10 +103,32 @@ main = do
let colorizedMode = "-C" `elem` args let colorizedMode = "-C" `elem` args
printTags colorizedMode $ map replaceTagColor $ removeParagraphTags $ TS.parseTree stdin printTags colorizedMode $ map replaceTagColor $ removeParagraphTags $ TS.parseTree stdin
data PotentiallyColorizedString = PCS -- Print a list of PCSs.
{ colorized :: IO (), -- Depending on the first argument, the color can be optionally
nonColorized :: String -- colored.
} printTags :: Bool -> [PotentiallyColorizedString] -> IO ()
printTags False = putStrLn . unwords . map nonColorized
printTags True = putStrLn . mconcatMap (show . colorized)
-- ANSI helpers
wrapSGR :: AN.SGR -> String -> Formatted
wrapSGR sgr str = FWrapped (AN.setSGRCode [sgr]) (fromString str) (AN.setSGRCode [AN.Reset])
wrapColor :: AN.Color -> String -> Formatted
wrapColor = wrapSGR . AN.SetColor AN.Foreground AN.Vivid
wrapTxt :: String -> String -> String
wrapTxt delimiter string = concat [delimiter, string, delimiter]
wrapTxt' :: String -> Formatted -> Formatted
wrapTxt' delimiter string = FSeveral [fromString delimiter, string, fromString delimiter]
bold :: AN.SGR
bold = AN.SetConsoleIntensity AN.BoldIntensity
mconcatMap :: Monoid b => (a -> b) -> [a] -> b
mconcatMap f = mconcat . map f
-- Remove `</para><para>` tags. -- Remove `</para><para>` tags.
-- If there are more in one doc comment, the middle ones -- If there are more in one doc comment, the middle ones
@ -45,30 +144,6 @@ removeParagraphTags (TS.TagLeaf (TS.TagOpen "para" _) : rest) = removeParagraphT
removeParagraphTags (x : y : rest) = x : removeParagraphTags (y : rest) removeParagraphTags (x : y : rest) = x : removeParagraphTags (y : rest)
removeParagraphTags x = x removeParagraphTags x = x
-- Print a list of PCSs.
-- Depending on the first argument, the color can be optionally
-- colored.
printTags :: Bool -> [PotentiallyColorizedString] -> IO ()
printTags False = putStrLn . unwords . map nonColorized
printTags True = mapM_ colorized
-- ANSI helpers
wrapSGR :: AN.SGR -> String -> IO ()
wrapSGR sgr str = do
AN.setSGR [sgr]
putStr str
AN.setSGR [AN.Reset]
wrapColor :: AN.Color -> String -> IO ()
wrapColor c = wrapSGR (AN.SetColor AN.Foreground AN.Vivid c)
wrapTxt :: String -> String -> String
wrapTxt delimiter string = concat [delimiter, string, delimiter]
bold :: AN.SGR
bold = AN.SetConsoleIntensity AN.BoldIntensity
pattern TextLeaf a = TS.TagLeaf (TS.TagText a) pattern TextLeaf a = TS.TagLeaf (TS.TagText a)
-- Replace tags with their PCS string equivalent. -- Replace tags with their PCS string equivalent.
@ -76,12 +151,12 @@ replaceTagColor :: TS.TagTree String -> PotentiallyColorizedString
replaceTagColor tagtree = case tagtree of replaceTagColor tagtree = case tagtree of
TS.TagLeaf (TS.TagText s) -> TS.TagLeaf (TS.TagText s) ->
PCS PCS
{ colorized = putStr s, { colorized = fromString s,
nonColorized = s nonColorized = s
} }
TS.TagBranch "para" _ inner -> TS.TagBranch "para" _ inner ->
PCS PCS
{ colorized = mapM_ (colorized . replaceTagColor) inner, { colorized = mconcatMap (colorized . replaceTagColor) inner,
nonColorized = concatMap (nonColorized . replaceTagColor) inner nonColorized = concatMap (nonColorized . replaceTagColor) inner
} }
TS.TagBranch "code" _ [TextLeaf content] -> TS.TagBranch "code" _ [TextLeaf content] ->
@ -136,12 +211,12 @@ replaceTagColor tagtree = case tagtree of
} }
TS.TagBranch "quote" _ inner -> TS.TagBranch "quote" _ inner ->
PCS PCS
{ colorized = sequence_ [putStr "\"", mapM_ (colorized . replaceTagColor) inner, putStr "\""], { colorized = wrapTxt' "\"" $ mconcatMap (colorized . replaceTagColor) inner,
nonColorized = wrapTxt "\"" $ concatMap (nonColorized . replaceTagColor) inner nonColorized = wrapTxt "\"" $ concatMap (nonColorized . replaceTagColor) inner
} }
TS.TagBranch "warning" _ inner -> TS.TagBranch "warning" _ inner ->
PCS PCS
{ colorized = sequence_ [wrapColor AN.Red "WARNING: ", mapM_ (colorized . replaceTagColor) inner], { colorized = mconcat [wrapColor AN.Red "WARNING: ", mconcatMap (colorized . replaceTagColor) inner],
nonColorized = "WARNING: " ++ concatMap (nonColorized . replaceTagColor) inner nonColorized = "WARNING: " ++ concatMap (nonColorized . replaceTagColor) inner
} }
TS.TagBranch "xref" [("linkend", link)] [] -> TS.TagBranch "xref" [("linkend", link)] [] ->
@ -149,15 +224,57 @@ replaceTagColor tagtree = case tagtree of
removeOptPrefix ('o' : 'p' : 't' : '-' : rest) = rest removeOptPrefix ('o' : 'p' : 't' : '-' : rest) = rest
removeOptPrefix x = x removeOptPrefix x = x
replaceName :: String -> IO () replaceName :: String -> Formatted
replaceName x = if x == "_name_" then wrapColor AN.Red "<name>" else wrapSGR bold x replaceName "_name_" = wrapColor AN.Red "<name>"
replaceName x = wrapSGR bold x
formattedLink :: [IO ()] formattedLink :: Formatted
formattedLink = intersperse (wrapSGR bold ".") $ map replaceName $ splitOn "." $ removeOptPrefix link formattedLink = mconcat $ intersperse (wrapSGR bold ".") $ map replaceName $ splitOn "." $ removeOptPrefix link
in PCS in PCS
{ colorized = sequence_ $ [putStr "`"] ++ formattedLink ++ [putStr "`"], { colorized = wrapTxt' "`" formattedLink,
nonColorized = wrapTxt "`" link nonColorized = wrapTxt "`" link
} }
-- TS.TagBranch "informaltable" _ [inner] ->
-- let
-- extractRows :: TS.TagTree String -> Maybe [TS.TagTree String]
-- extractRows (TS.TagBranch "tgroup" _ [TS.TagBranch "tbody" _ rows]) = Just rows
-- extractRows _ = Nothing
-- -- TODO: This filters too much
-- isWhiteSpace :: TS.TagTree String -> Bool
-- isWhiteSpace (TextLeaf s) = False
-- isWhiteSpace _ = True
-- parseRow :: TS.TagTree String -> Maybe [PotentiallyColorizedString]
-- parseRow (TS.TagBranch "row" _ entries) = sequence $ map parseEntry $ filter isWhiteSpace entries
-- parseRow _ = Nothing
-- parseEntry :: TS.TagTree String -> Maybe PotentiallyColorizedString
-- parseEntry (TS.TagBranch "entry" _ content) = Just $ mconcatMap replaceTagColor content
-- parseEntry _ = Nothing
-- rawRows :: Maybe [[PotentiallyColorizedString]]
-- rawRows = do
-- rows <- extractRows inner
-- sequence $ map parseRow $ filter isWhiteSpace rows
-- generateColSpec :: [[a]] -> [T.ColSpec]
-- generateColSpec rs = flip replicate T.def $ length $ rs !! 0
-- generateTableConfig :: T.Cell a => [[b]] -> [T.RowGroup a] -> String
-- generateTableConfig rs = (++) "\n" . T.tableString (generateColSpec rs) T.unicodeRoundS T.def
-- table :: T.Cell a => (PotentiallyColorizedString -> a) -> Maybe String
-- table f = case rawRows of
-- Nothing -> Nothing
-- Just rrs -> Just $ generateTableConfig rrs $ map (T.rowG . map f) $ rrs
-- errorMessage :: String
-- errorMessage = "ERROR: Could not parse <informaltable>";
-- in PCS {
-- colorized = fromMaybe (wrapColor AN.Red errorMessage) (FPlain <$> table (toTableFormattedType . colorized)),
-- nonColorized = fromMaybe errorMessage $ table nonColorized
-- }
TS.TagBranch "citerefentry" _ content -> TS.TagBranch "citerefentry" _ content ->
let tagBranchTagMatches :: String -> TS.TagTree String -> Bool let tagBranchTagMatches :: String -> TS.TagTree String -> Bool
tagBranchTagMatches x (TS.TagBranch tag _ _) = tag == x tagBranchTagMatches x (TS.TagBranch tag _ _) = tag == x