@@ -61,13 +61,14 @@ module Text.Pandoc.Readers.Docx
61
61
import Codec.Archive.Zip
62
62
import Control.Monad.Reader
63
63
import Control.Monad.State.Strict
64
+ import Data.Bifunctor (bimap , first )
64
65
import qualified Data.ByteString.Lazy as B
65
66
import Data.Default (Default )
66
- import Data.List (delete , intersect )
67
+ import Data.List (delete , intersect , foldl' )
67
68
import Data.Char (isSpace )
68
69
import qualified Data.Map as M
69
70
import qualified Data.Text as T
70
- import Data.Maybe (isJust , fromMaybe )
71
+ import Data.Maybe (catMaybes , isJust , fromMaybe )
71
72
import Data.Sequence (ViewL (.. ), viewl )
72
73
import qualified Data.Sequence as Seq
73
74
import qualified Data.Set as Set
@@ -113,6 +114,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
113
114
-- restarting
114
115
, docxListState :: M. Map (T. Text , T. Text ) Integer
115
116
, docxPrevPara :: Inlines
117
+ , docxTableCaptions :: [Blocks ]
116
118
}
117
119
118
120
instance Default DState where
@@ -123,6 +125,7 @@ instance Default DState where
123
125
, docxDropCap = mempty
124
126
, docxListState = M. empty
125
127
, docxPrevPara = mempty
128
+ , docxTableCaptions = []
126
129
}
127
130
128
131
data DEnv = DEnv { docxOptions :: ReaderOptions
@@ -491,15 +494,32 @@ singleParaToPlain blks
491
494
singleton $ Plain ils
492
495
singleParaToPlain blks = blks
493
496
494
- cellToBlocks :: PandocMonad m => Docx. Cell -> DocxContext m Blocks
495
- cellToBlocks (Docx. Cell bps) = do
497
+ cellToCell :: PandocMonad m => RowSpan -> Docx. Cell -> DocxContext m Pandoc. Cell
498
+ cellToCell rowSpan (Docx. Cell gridSpan _ bps) = do
496
499
blks <- smushBlocks <$> mapM bodyPartToBlocks bps
497
- return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
500
+ let blks' = singleParaToPlain $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
501
+ return (cell AlignDefault rowSpan (ColSpan (fromIntegral gridSpan)) blks')
502
+
503
+ rowsToRows :: PandocMonad m => [Docx. Row ] -> DocxContext m [Pandoc. Row ]
504
+ rowsToRows rows = do
505
+ let rowspans = (fmap . fmap ) (first RowSpan ) (Docx. rowsToRowspans rows)
506
+ cells <- traverse (traverse (uncurry cellToCell)) rowspans
507
+ return (fmap (Pandoc. Row nullAttr) cells)
508
+
509
+ splitHeaderRows :: Bool -> [Docx. Row ] -> ([Docx. Row ], [Docx. Row ])
510
+ splitHeaderRows hasFirstRowFormatting rs = bimap reverse reverse $ fst
511
+ $ if hasFirstRowFormatting
512
+ then foldl' f ((take 1 rs, [] ), True ) (drop 1 rs)
513
+ else foldl' f (([] , [] ), False ) rs
514
+ where
515
+ f ((headerRows, bodyRows), previousRowWasHeader) r@ (Docx. Row h cs)
516
+ | h == HasTblHeader || (previousRowWasHeader && any isContinuationCell cs)
517
+ = ((r : headerRows, bodyRows), True )
518
+ | otherwise
519
+ = ((headerRows, r : bodyRows), False )
520
+
521
+ isContinuationCell (Docx. Cell _ vm _) = vm == Docx. Continue
498
522
499
- rowToBlocksList :: PandocMonad m => Docx. Row -> DocxContext m [Blocks ]
500
- rowToBlocksList (Docx. Row cells) = do
501
- blksList <- mapM cellToBlocks cells
502
- return $ map singleParaToPlain blksList
503
523
504
524
-- like trimInlines, but also take out linebreaks
505
525
trimSps :: Inlines -> Inlines
@@ -546,6 +566,11 @@ normalizeToClassName = T.map go . fromStyleName
546
566
where go c | isSpace c = ' -'
547
567
| otherwise = c
548
568
569
+ bodyPartToTableCaption :: PandocMonad m => BodyPart -> DocxContext m (Maybe Blocks )
570
+ bodyPartToTableCaption (TblCaption pPr parparts) =
571
+ Just <$> bodyPartToBlocks (Paragraph pPr parparts)
572
+ bodyPartToTableCaption _ = pure Nothing
573
+
549
574
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
550
575
bodyPartToBlocks (Paragraph pPr parparts)
551
576
| Just True <- pBidi pPr = do
@@ -637,50 +662,43 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
637
662
let pPr' = pPr {pStyle = constructBogusParStyleData " list-paragraph" : pStyle pPr}
638
663
in
639
664
bodyPartToBlocks $ Paragraph pPr' parparts
665
+ bodyPartToBlocks (TblCaption _ _) =
666
+ return $ para mempty -- collected separately
640
667
bodyPartToBlocks (Tbl _ _ _ [] ) =
641
668
return $ para mempty
642
- bodyPartToBlocks (Tbl cap grid look parts@ (r: rs)) = do
643
- let cap' = simpleCaption $ plain $ text cap
644
- (hdr, rows) = case firstRowFormatting look of
645
- True | null rs -> (Nothing , [r])
646
- | otherwise -> (Just r, rs)
647
- False -> (Nothing , r: rs)
648
-
649
- cells <- mapM rowToBlocksList rows
669
+ bodyPartToBlocks (Tbl cap grid look parts) = do
670
+ captions <- gets docxTableCaptions
671
+ fullCaption <- case captions of
672
+ c : cs -> do
673
+ modify (\ s -> s { docxTableCaptions = cs })
674
+ return (Just c)
675
+ [] -> return Nothing
676
+ let shortCaption = if T. null cap then Nothing else Just (toList (text cap))
677
+ cap' = caption shortCaption (fromMaybe mempty fullCaption)
678
+ (hdr, rows) = splitHeaderRows (firstRowFormatting look) parts
650
679
651
680
let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
652
681
rowLength :: Docx. Row -> Int
653
- rowLength (Docx. Row c) = length c
682
+ rowLength (Docx. Row _ c) = sum ( fmap ( \ ( Docx. Cell gridSpan _ _) -> fromIntegral gridSpan) c)
654
683
655
- let toRow = Pandoc. Row nullAttr . map simpleCell
656
- toHeaderRow l = [toRow l | not ( null l)]
684
+ headerCells <- rowsToRows hdr
685
+ bodyCells <- rowsToRows rows
657
686
658
- -- pad cells. New Text.Pandoc.Builder will do that for us,
659
- -- so this is for compatibility while we switch over.
660
- let cells' = map (\ row -> toRow $ take width (row ++ repeat mempty )) cells
661
-
662
- hdrCells <- case hdr of
663
- Just r' -> toHeaderRow <$> rowToBlocksList r'
664
- Nothing -> return []
665
-
666
- -- The two following variables (horizontal column alignment and
667
- -- relative column widths) go to the default at the
668
- -- moment. Width information is in the TblGrid field of the Tbl,
669
- -- so should be possible. Alignment might be more difficult,
670
- -- since there doesn't seem to be a column entity in docx.
687
+ -- Horizontal column alignment goes to the default at the moment. Getting
688
+ -- it might be difficult, since there doesn't seem to be a column entity
689
+ -- in docx.
671
690
let alignments = replicate width AlignDefault
672
691
totalWidth = sum grid
673
692
widths = (\ w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
674
693
675
694
return $ table cap'
676
695
(zip alignments widths)
677
- (TableHead nullAttr hdrCells )
678
- [TableBody nullAttr 0 [] cells' ]
696
+ (TableHead nullAttr headerCells )
697
+ [TableBody nullAttr 0 [] bodyCells ]
679
698
(TableFoot nullAttr [] )
680
699
bodyPartToBlocks (OMathPara e) =
681
700
return $ para $ displayMath (writeTeX e)
682
701
683
-
684
702
-- replace targets with generated anchors.
685
703
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
686
704
rewriteLink' l@ (Link attr ils (T. uncons -> Just (' #' ,target), title)) = do
@@ -716,6 +734,8 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
716
734
bodyToOutput (Body bps) = do
717
735
let (metabps, blkbps) = sepBodyParts bps
718
736
meta <- bodyPartsToMeta metabps
737
+ captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps
738
+ modify (\ s -> s { docxTableCaptions = captions })
719
739
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
720
740
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
721
741
blks'' <- removeOrphanAnchors blks'
0 commit comments