docbook2txt: halfway finish `<informaltable>`
This required a lot of restructuring, which is why I won't put this on another branch. A part of the library which is updated in github but not yet at hackage, is needed to continue. Specifically, the `Monoid` instance in `Text.Layout.Table.Cell.Formatted` is needed to put together multiple colored text in a table. Among other things introduced, there is now our own `Formatted` type which is used to await formatting until things like tables come up. Compared to the earlier `IO ()` approach, this is probably a lot better.
This commit is contained in:
parent
4099bbce8d
commit
51ab2cf393
|
@ -1,4 +1,4 @@
|
||||||
{ pkgs, compiler ? "ghc924", ... }:
|
{ pkgs, compiler ? "ghc924", ... }:
|
||||||
pkgs.writers.writeHaskellBin "docbook2txt" {
|
pkgs.writers.writeHaskellBin "docbook2txt" {
|
||||||
libraries = with pkgs.haskellPackages; [ tagsoup ansi-terminal split text ];
|
libraries = with pkgs.haskellPackages; [ tagsoup ansi-terminal split table-layout text ];
|
||||||
} (builtins.readFile ./docbook2txt.hs)
|
} (builtins.readFile ./docbook2txt.hs)
|
||||||
|
|
|
@ -8,16 +8,93 @@
|
||||||
-- end up having to write custom conversion logic for every tag to be
|
-- end up having to write custom conversion logic for every tag to be
|
||||||
-- consumed by pandoc anyway. So instead, I am just planning on keeping
|
-- consumed by pandoc anyway. So instead, I am just planning on keeping
|
||||||
-- my own module parsing raw xml tags (for now).
|
-- my own module parsing raw xml tags (for now).
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE PatternSynonyms #-}
|
{-# LANGUAGE PatternSynonyms #-}
|
||||||
|
|
||||||
|
import Data.Char (isSpace)
|
||||||
import Data.List (find, intersperse)
|
import Data.List (find, intersperse)
|
||||||
import Data.List.Split (splitOn)
|
import Data.List.Split (splitOn)
|
||||||
import qualified System.Console.ANSI as AN
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.String (IsString, fromString)
|
||||||
|
import qualified System.Console.ANSI.Codes as AN
|
||||||
import qualified System.Console.ANSI.Types as AN
|
import qualified System.Console.ANSI.Types as AN
|
||||||
import System.Environment (getArgs)
|
import System.Environment (getArgs)
|
||||||
import qualified Text.HTML.TagSoup as TS
|
import qualified Text.HTML.TagSoup as TS
|
||||||
import qualified Text.HTML.TagSoup.Tree as TS
|
import qualified Text.HTML.TagSoup.Tree as TS
|
||||||
|
-- import qualified Text.Layout.Table as T
|
||||||
|
-- import qualified Text.Layout.Table.Cell as T
|
||||||
|
-- import qualified Text.Layout.Table.Cell.Formatted as T
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Datatypes with relevant methods
|
||||||
|
--
|
||||||
|
|
||||||
|
-- TODO: Mark reflowable text, and do a reflow fold before print.
|
||||||
|
|
||||||
|
-- data ShouldReflow a = SRReflow a
|
||||||
|
-- | SRConstant a
|
||||||
|
-- deriving (Show, Eq)
|
||||||
|
|
||||||
|
data Formatted
|
||||||
|
= FSeveral [Formatted]
|
||||||
|
| FWrapped String Formatted String
|
||||||
|
| FPlain String
|
||||||
|
deriving (Eq)
|
||||||
|
|
||||||
|
instance IsString Formatted where
|
||||||
|
fromString = FPlain
|
||||||
|
|
||||||
|
instance Show Formatted where
|
||||||
|
show (FSeveral fs) = concatMap show fs
|
||||||
|
show (FWrapped w1 s w2) = concat [w1, show s, w2]
|
||||||
|
show (FPlain s) = s
|
||||||
|
|
||||||
|
instance Semigroup Formatted where
|
||||||
|
FSeveral x <> FSeveral y = FSeveral $ x ++ y
|
||||||
|
f1 <> FSeveral x = FSeveral $ f1 : x
|
||||||
|
FSeveral x <> f1 = FSeveral $ x ++ [f1]
|
||||||
|
f1 <> f2 = FSeveral [f1, f2]
|
||||||
|
|
||||||
|
instance Monoid Formatted where
|
||||||
|
mempty = FPlain mempty
|
||||||
|
|
||||||
|
realString :: Formatted -> String
|
||||||
|
realString (FSeveral fs) = concatMap realString fs
|
||||||
|
realString (FWrapped w1 s w2) = realString s
|
||||||
|
realString (FPlain s) = s
|
||||||
|
|
||||||
|
realLength :: Formatted -> Int
|
||||||
|
realLength = length . realString
|
||||||
|
|
||||||
|
-- TODO: Revisit when table-layout gets a new release
|
||||||
|
-- https://github.com/muesli4/table-layout/issues/43
|
||||||
|
|
||||||
|
-- toTableFormattedType :: Formatted -> T.Formatted String
|
||||||
|
-- toTableFormattedType f = case f of
|
||||||
|
-- FSeveral fs -> mconcatMap toTableFormattedType fs
|
||||||
|
-- FWrapped w1 (FPlain s) w2 -> T.formatted w1 s w2
|
||||||
|
-- FWrapped _ f1 _ -> toTableFormattedType f1
|
||||||
|
-- FPlain s -> T.plain s
|
||||||
|
|
||||||
|
data PotentiallyColorizedString = PCS
|
||||||
|
{ colorized :: Formatted,
|
||||||
|
nonColorized :: String
|
||||||
|
}
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
instance Semigroup PotentiallyColorizedString where
|
||||||
|
pcs1 <> pcs2 =
|
||||||
|
PCS
|
||||||
|
{ colorized = colorized pcs1 <> colorized pcs2,
|
||||||
|
nonColorized = nonColorized pcs1 <> nonColorized pcs2
|
||||||
|
}
|
||||||
|
|
||||||
|
instance Monoid PotentiallyColorizedString where
|
||||||
|
mempty =
|
||||||
|
PCS
|
||||||
|
{ colorized = mempty,
|
||||||
|
nonColorized = mempty
|
||||||
|
}
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
|
@ -26,10 +103,32 @@ main = do
|
||||||
let colorizedMode = "-C" `elem` args
|
let colorizedMode = "-C" `elem` args
|
||||||
printTags colorizedMode $ map replaceTagColor $ removeParagraphTags $ TS.parseTree stdin
|
printTags colorizedMode $ map replaceTagColor $ removeParagraphTags $ TS.parseTree stdin
|
||||||
|
|
||||||
data PotentiallyColorizedString = PCS
|
-- Print a list of PCSs.
|
||||||
{ colorized :: IO (),
|
-- Depending on the first argument, the color can be optionally
|
||||||
nonColorized :: String
|
-- colored.
|
||||||
}
|
printTags :: Bool -> [PotentiallyColorizedString] -> IO ()
|
||||||
|
printTags False = putStrLn . unwords . map nonColorized
|
||||||
|
printTags True = putStrLn . mconcatMap (show . colorized)
|
||||||
|
|
||||||
|
-- ANSI helpers
|
||||||
|
|
||||||
|
wrapSGR :: AN.SGR -> String -> Formatted
|
||||||
|
wrapSGR sgr str = FWrapped (AN.setSGRCode [sgr]) (fromString str) (AN.setSGRCode [AN.Reset])
|
||||||
|
|
||||||
|
wrapColor :: AN.Color -> String -> Formatted
|
||||||
|
wrapColor = wrapSGR . AN.SetColor AN.Foreground AN.Vivid
|
||||||
|
|
||||||
|
wrapTxt :: String -> String -> String
|
||||||
|
wrapTxt delimiter string = concat [delimiter, string, delimiter]
|
||||||
|
|
||||||
|
wrapTxt' :: String -> Formatted -> Formatted
|
||||||
|
wrapTxt' delimiter string = FSeveral [fromString delimiter, string, fromString delimiter]
|
||||||
|
|
||||||
|
bold :: AN.SGR
|
||||||
|
bold = AN.SetConsoleIntensity AN.BoldIntensity
|
||||||
|
|
||||||
|
mconcatMap :: Monoid b => (a -> b) -> [a] -> b
|
||||||
|
mconcatMap f = mconcat . map f
|
||||||
|
|
||||||
-- Remove `</para><para>` tags.
|
-- Remove `</para><para>` tags.
|
||||||
-- If there are more in one doc comment, the middle ones
|
-- If there are more in one doc comment, the middle ones
|
||||||
|
@ -45,30 +144,6 @@ removeParagraphTags (TS.TagLeaf (TS.TagOpen "para" _) : rest) = removeParagraphT
|
||||||
removeParagraphTags (x : y : rest) = x : removeParagraphTags (y : rest)
|
removeParagraphTags (x : y : rest) = x : removeParagraphTags (y : rest)
|
||||||
removeParagraphTags x = x
|
removeParagraphTags x = x
|
||||||
|
|
||||||
-- Print a list of PCSs.
|
|
||||||
-- Depending on the first argument, the color can be optionally
|
|
||||||
-- colored.
|
|
||||||
printTags :: Bool -> [PotentiallyColorizedString] -> IO ()
|
|
||||||
printTags False = putStrLn . unwords . map nonColorized
|
|
||||||
printTags True = mapM_ colorized
|
|
||||||
|
|
||||||
-- ANSI helpers
|
|
||||||
|
|
||||||
wrapSGR :: AN.SGR -> String -> IO ()
|
|
||||||
wrapSGR sgr str = do
|
|
||||||
AN.setSGR [sgr]
|
|
||||||
putStr str
|
|
||||||
AN.setSGR [AN.Reset]
|
|
||||||
|
|
||||||
wrapColor :: AN.Color -> String -> IO ()
|
|
||||||
wrapColor c = wrapSGR (AN.SetColor AN.Foreground AN.Vivid c)
|
|
||||||
|
|
||||||
wrapTxt :: String -> String -> String
|
|
||||||
wrapTxt delimiter string = concat [delimiter, string, delimiter]
|
|
||||||
|
|
||||||
bold :: AN.SGR
|
|
||||||
bold = AN.SetConsoleIntensity AN.BoldIntensity
|
|
||||||
|
|
||||||
pattern TextLeaf a = TS.TagLeaf (TS.TagText a)
|
pattern TextLeaf a = TS.TagLeaf (TS.TagText a)
|
||||||
|
|
||||||
-- Replace tags with their PCS string equivalent.
|
-- Replace tags with their PCS string equivalent.
|
||||||
|
@ -76,12 +151,12 @@ replaceTagColor :: TS.TagTree String -> PotentiallyColorizedString
|
||||||
replaceTagColor tagtree = case tagtree of
|
replaceTagColor tagtree = case tagtree of
|
||||||
TS.TagLeaf (TS.TagText s) ->
|
TS.TagLeaf (TS.TagText s) ->
|
||||||
PCS
|
PCS
|
||||||
{ colorized = putStr s,
|
{ colorized = fromString s,
|
||||||
nonColorized = s
|
nonColorized = s
|
||||||
}
|
}
|
||||||
TS.TagBranch "para" _ inner ->
|
TS.TagBranch "para" _ inner ->
|
||||||
PCS
|
PCS
|
||||||
{ colorized = mapM_ (colorized . replaceTagColor) inner,
|
{ colorized = mconcatMap (colorized . replaceTagColor) inner,
|
||||||
nonColorized = concatMap (nonColorized . replaceTagColor) inner
|
nonColorized = concatMap (nonColorized . replaceTagColor) inner
|
||||||
}
|
}
|
||||||
TS.TagBranch "code" _ [TextLeaf content] ->
|
TS.TagBranch "code" _ [TextLeaf content] ->
|
||||||
|
@ -136,12 +211,12 @@ replaceTagColor tagtree = case tagtree of
|
||||||
}
|
}
|
||||||
TS.TagBranch "quote" _ inner ->
|
TS.TagBranch "quote" _ inner ->
|
||||||
PCS
|
PCS
|
||||||
{ colorized = sequence_ [putStr "\"", mapM_ (colorized . replaceTagColor) inner, putStr "\""],
|
{ colorized = wrapTxt' "\"" $ mconcatMap (colorized . replaceTagColor) inner,
|
||||||
nonColorized = wrapTxt "\"" $ concatMap (nonColorized . replaceTagColor) inner
|
nonColorized = wrapTxt "\"" $ concatMap (nonColorized . replaceTagColor) inner
|
||||||
}
|
}
|
||||||
TS.TagBranch "warning" _ inner ->
|
TS.TagBranch "warning" _ inner ->
|
||||||
PCS
|
PCS
|
||||||
{ colorized = sequence_ [wrapColor AN.Red "WARNING: ", mapM_ (colorized . replaceTagColor) inner],
|
{ colorized = mconcat [wrapColor AN.Red "WARNING: ", mconcatMap (colorized . replaceTagColor) inner],
|
||||||
nonColorized = "WARNING: " ++ concatMap (nonColorized . replaceTagColor) inner
|
nonColorized = "WARNING: " ++ concatMap (nonColorized . replaceTagColor) inner
|
||||||
}
|
}
|
||||||
TS.TagBranch "xref" [("linkend", link)] [] ->
|
TS.TagBranch "xref" [("linkend", link)] [] ->
|
||||||
|
@ -149,15 +224,57 @@ replaceTagColor tagtree = case tagtree of
|
||||||
removeOptPrefix ('o' : 'p' : 't' : '-' : rest) = rest
|
removeOptPrefix ('o' : 'p' : 't' : '-' : rest) = rest
|
||||||
removeOptPrefix x = x
|
removeOptPrefix x = x
|
||||||
|
|
||||||
replaceName :: String -> IO ()
|
replaceName :: String -> Formatted
|
||||||
replaceName x = if x == "_name_" then wrapColor AN.Red "<name>" else wrapSGR bold x
|
replaceName "_name_" = wrapColor AN.Red "<name>"
|
||||||
|
replaceName x = wrapSGR bold x
|
||||||
|
|
||||||
formattedLink :: [IO ()]
|
formattedLink :: Formatted
|
||||||
formattedLink = intersperse (wrapSGR bold ".") $ map replaceName $ splitOn "." $ removeOptPrefix link
|
formattedLink = mconcat $ intersperse (wrapSGR bold ".") $ map replaceName $ splitOn "." $ removeOptPrefix link
|
||||||
in PCS
|
in PCS
|
||||||
{ colorized = sequence_ $ [putStr "`"] ++ formattedLink ++ [putStr "`"],
|
{ colorized = wrapTxt' "`" formattedLink,
|
||||||
nonColorized = wrapTxt "`" link
|
nonColorized = wrapTxt "`" link
|
||||||
}
|
}
|
||||||
|
-- TS.TagBranch "informaltable" _ [inner] ->
|
||||||
|
-- let
|
||||||
|
-- extractRows :: TS.TagTree String -> Maybe [TS.TagTree String]
|
||||||
|
-- extractRows (TS.TagBranch "tgroup" _ [TS.TagBranch "tbody" _ rows]) = Just rows
|
||||||
|
-- extractRows _ = Nothing
|
||||||
|
|
||||||
|
-- -- TODO: This filters too much
|
||||||
|
-- isWhiteSpace :: TS.TagTree String -> Bool
|
||||||
|
-- isWhiteSpace (TextLeaf s) = False
|
||||||
|
-- isWhiteSpace _ = True
|
||||||
|
|
||||||
|
-- parseRow :: TS.TagTree String -> Maybe [PotentiallyColorizedString]
|
||||||
|
-- parseRow (TS.TagBranch "row" _ entries) = sequence $ map parseEntry $ filter isWhiteSpace entries
|
||||||
|
-- parseRow _ = Nothing
|
||||||
|
|
||||||
|
-- parseEntry :: TS.TagTree String -> Maybe PotentiallyColorizedString
|
||||||
|
-- parseEntry (TS.TagBranch "entry" _ content) = Just $ mconcatMap replaceTagColor content
|
||||||
|
-- parseEntry _ = Nothing
|
||||||
|
|
||||||
|
-- rawRows :: Maybe [[PotentiallyColorizedString]]
|
||||||
|
-- rawRows = do
|
||||||
|
-- rows <- extractRows inner
|
||||||
|
-- sequence $ map parseRow $ filter isWhiteSpace rows
|
||||||
|
|
||||||
|
-- generateColSpec :: [[a]] -> [T.ColSpec]
|
||||||
|
-- generateColSpec rs = flip replicate T.def $ length $ rs !! 0
|
||||||
|
|
||||||
|
-- generateTableConfig :: T.Cell a => [[b]] -> [T.RowGroup a] -> String
|
||||||
|
-- generateTableConfig rs = (++) "\n" . T.tableString (generateColSpec rs) T.unicodeRoundS T.def
|
||||||
|
|
||||||
|
-- table :: T.Cell a => (PotentiallyColorizedString -> a) -> Maybe String
|
||||||
|
-- table f = case rawRows of
|
||||||
|
-- Nothing -> Nothing
|
||||||
|
-- Just rrs -> Just $ generateTableConfig rrs $ map (T.rowG . map f) $ rrs
|
||||||
|
|
||||||
|
-- errorMessage :: String
|
||||||
|
-- errorMessage = "ERROR: Could not parse <informaltable>";
|
||||||
|
-- in PCS {
|
||||||
|
-- colorized = fromMaybe (wrapColor AN.Red errorMessage) (FPlain <$> table (toTableFormattedType . colorized)),
|
||||||
|
-- nonColorized = fromMaybe errorMessage $ table nonColorized
|
||||||
|
-- }
|
||||||
TS.TagBranch "citerefentry" _ content ->
|
TS.TagBranch "citerefentry" _ content ->
|
||||||
let tagBranchTagMatches :: String -> TS.TagTree String -> Bool
|
let tagBranchTagMatches :: String -> TS.TagTree String -> Bool
|
||||||
tagBranchTagMatches x (TS.TagBranch tag _ _) = tag == x
|
tagBranchTagMatches x (TS.TagBranch tag _ _) = tag == x
|
||||||
|
|
Loading…
Reference in New Issue