@@ -62,10 +62,10 @@ public static function addHtml($element, $html, $fullHTML = false, $preserveWhit
6262        // Preprocess: remove all line ends, decode HTML entity, 
6363        // fix ampersand and angle brackets and add body tag for HTML fragments 
6464        $ html  = str_replace (array ("\n" , "\r" ), '' , $ html );
65-         $ html  = str_replace (array ('< ' , '> ' , '& ' ), array ('_lt_ ' , '_gt_ ' , '_amp_ ' ), $ html );
65+         $ html  = str_replace (array ('< ' , '> ' , '& ' ,  ' " '  ), array ('_lt_ ' , '_gt_ ' , '_amp_ '  ,  ' _quot_ ' ), $ html );
6666        $ html  = html_entity_decode ($ html , ENT_QUOTES , 'UTF-8 ' );
6767        $ html  = str_replace ('& ' , '& ' , $ html );
68-         $ html  = str_replace (array ('_lt_ ' , '_gt_ ' , '_amp_ ' ), array ('< ' , '> ' , '& ' ), $ html );
68+         $ html  = str_replace (array ('_lt_ ' , '_gt_ ' , '_amp_ ' ,  ' _quot_ '  ), array ('< ' , '> ' , '&  '  ,  ' " ; ' ), $ html );
6969
7070        if  (false  === $ fullHTML ) {
7171            $ html  = '<body> '  . $ html  . '</body> ' ;
@@ -96,15 +96,43 @@ protected static function parseInlineStyle($node, $styles = array())
9696            $ attributes  = $ node ->attributes ; // get all the attributes(eg: id, class) 
9797
9898            foreach  ($ attributes  as  $ attribute ) {
99-                 switch  ($ attribute ->name ) {
99+                 $ val  = $ attribute ->value ;
100+                 switch  (strtolower ($ attribute ->name )) {
100101                    case  'style ' :
101102                        $ styles  = self ::parseStyle ($ attribute , $ styles );
102103                        break ;
103104                    case  'align ' :
104-                         $ styles ['alignment ' ] = self ::mapAlign ($ attribute -> value );
105+                         $ styles ['alignment ' ] = self ::mapAlign (trim ( $ val ) );
105106                        break ;
106107                    case  'lang ' :
107-                         $ styles ['lang ' ] = $ attribute ->value ;
108+                         $ styles ['lang ' ] = $ val ;
109+                         break ;
110+                     case  'width ' :
111+                         // tables, cells 
112+                         if  (false  !== strpos ($ val , '% ' )) {
113+                             // e.g. <table width="100%"> or <td width="50%"> 
114+                             $ styles ['width ' ] = intval ($ val ) * 50 ;
115+                             $ styles ['unit ' ] = \PhpOffice \PhpWord \SimpleType \TblWidth::PERCENT ;
116+                         } else  {
117+                             // e.g. <table width="250> where "250" = 250px (always pixels) 
118+                             $ styles ['width ' ] = Converter::pixelToTwip ($ val );
119+                             $ styles ['unit ' ] = \PhpOffice \PhpWord \SimpleType \TblWidth::TWIP ;
120+                         }
121+                         break ;
122+                     case  'cellspacing ' :
123+                         // tables e.g. <table cellspacing="2">,  where "2" = 2px (always pixels) 
124+                         $ val  = intval ($ val ).'px ' ;
125+                         $ styles ['cellSpacing ' ] = Converter::cssToTwip ($ val );
126+                         break ;
127+                     case  'bgcolor ' :
128+                         // tables, rows, cells e.g. <tr bgColor="#FF0000"> 
129+                         $ styles ['bgColor ' ] = trim ($ val , '#  ' );
130+                         break ;
131+                     case  'valign ' :
132+                         // cells e.g. <td valign="middle"> 
133+                         if  (preg_match ('#(?:top|bottom|middle|baseline)#i ' , $ val , $ matches )) {
134+                             $ styles ['valign ' ] = self ::mapAlignVertical ($ matches [0 ]);
135+                         }
108136                        break ;
109137                }
110138            }
@@ -161,6 +189,7 @@ protected static function parseNode($node, $element, $styles = array(), $data =
161189            'img '        => array ('Image ' ,       $ node ,  $ element ,   $ styles ,    null ,   null ,           null ),
162190            'br '         => array ('LineBreak ' ,   null ,   $ element ,   $ styles ,    null ,   null ,           null ),
163191            'a '          => array ('Link ' ,        $ node ,  $ element ,   $ styles ,    null ,   null ,           null ),
192+             'hr '         => array ('HorizRule ' ,   $ node ,  $ element ,   $ styles ,    null ,   null ,           null ),
164193        );
165194
166195        $ newElement  = null ;
@@ -361,7 +390,11 @@ protected static function parseCell($node, $element, &$styles)
361390        if  (!empty ($ colspan )) {
362391            $ cellStyles ['gridSpan ' ] = $ colspan  - 0 ;
363392        }
364-         $ cell  = $ element ->addCell (null , $ cellStyles );
393+ 
394+         // set cell width to control column widths 
395+         $ width  = isset ($ cellStyles ['width ' ]) ? $ cellStyles ['width ' ] : null ;
396+         unset($ cellStyles ['width ' ]); // would not apply 
397+         $ cell  = $ element ->addCell ($ width , $ cellStyles );
365398
366399        if  (self ::shouldAddTextRun ($ node )) {
367400            return  $ cell ->addTextRun (self ::parseInlineStyle ($ node , $ styles ['paragraph ' ]));
@@ -420,7 +453,32 @@ protected static function parseList($node, $element, &$styles, &$data)
420453        } else  {
421454            $ data ['listdepth ' ] = 0 ;
422455            $ styles ['list ' ] = 'listStyle_ '  . self ::$ listIndex ++;
423-             $ element ->getPhpWord ()->addNumberingStyle ($ styles ['list ' ], self ::getListStyle ($ isOrderedList ));
456+             $ style  = $ element ->getPhpWord ()->addNumberingStyle ($ styles ['list ' ], self ::getListStyle ($ isOrderedList ));
457+ 
458+             // extract attributes start & type e.g. <ol type="A" start="3"> 
459+             $ start  = 0 ;
460+             $ type  = '' ;
461+             foreach  ($ node ->attributes  as  $ attribute ) {
462+                 switch  ($ attribute ->name ) {
463+                     case  'start ' :
464+                         $ start  = (int ) $ attribute ->value ;
465+                         break ;
466+                     case  'type ' :
467+                         $ type  = $ attribute ->value ;
468+                         break ;
469+                 }
470+             }
471+ 
472+             $ levels  = $ style ->getLevels ();
473+             /** @var \PhpOffice\PhpWord\Style\NumberingLevel */ 
474+             $ level  = $ levels [0 ];
475+             if  ($ start  > 0 ) {
476+                 $ level ->setStart ($ start );
477+             }
478+             $ type  = $ type  ? self ::mapListType ($ type ) : null ;
479+             if  ($ type ) {
480+                 $ level ->setFormat ($ type );
481+             }
424482        }
425483        if  ($ node ->parentNode ->nodeName  === 'li ' ) {
426484            return  $ element ->getParent ();
@@ -502,7 +560,8 @@ protected static function parseStyle($attribute, $styles)
502560        foreach  ($ properties  as  $ property ) {
503561            list ($ cKey , $ cValue ) = array_pad (explode (': ' , $ property , 2 ), 2 , null );
504562            $ cValue  = trim ($ cValue );
505-             switch  (trim ($ cKey )) {
563+             $ cKey  = strtolower (trim ($ cKey ));
564+             switch  ($ cKey ) {
506565                case  'text-decoration ' :
507566                    switch  ($ cValue ) {
508567                        case  'underline ' :
@@ -575,11 +634,18 @@ protected static function parseStyle($attribute, $styles)
575634                    }
576635                    $ styles ['italic ' ] = $ tValue ;
577636                    break ;
637+                 case  'margin ' :
638+                     $ cValue  = Converter::cssToTwip ($ cValue );
639+                     $ styles ['spaceBefore ' ] = $ cValue ;
640+                     $ styles ['spaceAfter ' ] = $ cValue ;
641+                     break ;
578642                case  'margin-top ' :
579-                     $ styles ['spaceBefore ' ] = Converter::cssToPoint ($ cValue );
643+                     // BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue) 
644+                     $ styles ['spaceBefore ' ] = Converter::cssToTwip ($ cValue );
580645                    break ;
581646                case  'margin-bottom ' :
582-                     $ styles ['spaceAfter ' ] = Converter::cssToPoint ($ cValue );
647+                     // BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue) 
648+                     $ styles ['spaceAfter ' ] = Converter::cssToTwip ($ cValue );
583649                    break ;
584650                case  'border-color ' :
585651                    self ::mapBorderColor ($ styles , $ cValue );
@@ -603,10 +669,37 @@ protected static function parseStyle($attribute, $styles)
603669                    }
604670                    break ;
605671                case  'border ' :
606-                     if  (preg_match ('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+)\s+([a-z]+)/ ' , $ cValue , $ matches )) {
607-                         $ styles ['borderSize ' ] = Converter::cssToPoint ($ matches [1 ]);
608-                         $ styles ['borderColor ' ] = trim ($ matches [2 ], '# ' );
609-                         $ styles ['borderStyle ' ] = self ::mapBorderStyle ($ matches [3 ]);
672+                 case  'border-top ' :
673+                 case  'border-bottom ' :
674+                 case  'border-right ' :
675+                 case  'border-left ' :
676+                     // must have exact order [width color style], e.g. "1px #0011CC solid" or "2pt green solid" 
677+                     // Word does not accept shortened hex colors e.g. #CCC, only full e.g. #CCCCCC 
678+                     if  (preg_match ('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+|[a-zA-Z]+)\s+([a-z]+)/ ' , $ cValue , $ matches )) {
679+                         if  (false  !== strpos ($ cKey , '- ' )) {
680+                             $ which  = explode ('- ' , $ cKey )[1 ];
681+                             $ which  = ucfirst ($ which ); // e.g. bottom -> Bottom 
682+                         } else  {
683+                             $ which  = '' ;
684+                         }
685+                         // Note - border width normalization: 
686+                         // Width of border in Word is calculated differently than HTML borders, usually showing up too bold. 
687+                         // Smallest 1px (or 1pt) appears in Word like 2-3px/pt in HTML once converted to twips. 
688+                         // Therefore we need to normalize converted twip value to cca 1/2 of value. 
689+                         // This may be adjusted, if better ratio or formula found. 
690+                         // BC change: up to ver. 0.17.0 was $size converted to points - Converter::cssToPoint($size) 
691+                         $ size  = Converter::cssToTwip ($ matches [1 ]);
692+                         $ size  = intval ($ size  / 2 );
693+                         // valid variants may be e.g. borderSize, borderTopSize, borderLeftColor, etc .. 
694+                         $ styles ["border {$ which }Size " ] = $ size ; // twips 
695+                         $ styles ["border {$ which }Color " ] = trim ($ matches [2 ], '# ' );
696+                         $ styles ["border {$ which }Style " ] = self ::mapBorderStyle ($ matches [3 ]);
697+                     }
698+                     break ;
699+                 case  'vertical-align ' :
700+                     // https://developer.mozilla.org/en-US/docs/Web/CSS/vertical-align 
701+                     if  (preg_match ('#(?:top|bottom|middle|sub|baseline)#i ' , $ cValue , $ matches )) {
702+                         $ styles ['valign ' ] = self ::mapAlignVertical ($ matches [0 ]);
610703                    }
611704                    break ;
612705            }
@@ -651,14 +744,14 @@ protected static function parseImage($node, $element)
651744                                case  'float ' :
652745                                    if  (trim ($ v ) == 'right ' ) {
653746                                        $ style ['hPos ' ] = \PhpOffice \PhpWord \Style \Image::POS_RIGHT ;
654-                                         $ style ['hPosRelTo ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELTO_PAGE ; 
747+                                         $ style ['hPosRelTo ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELTO_MARGIN ;  // inner section area 
655748                                        $ style ['pos ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELATIVE ;
656749                                        $ style ['wrap ' ] = \PhpOffice \PhpWord \Style \Image::WRAP_TIGHT ;
657750                                        $ style ['overlap ' ] = true ;
658751                                    }
659752                                    if  (trim ($ v ) == 'left ' ) {
660753                                        $ style ['hPos ' ] = \PhpOffice \PhpWord \Style \Image::POS_LEFT ;
661-                                         $ style ['hPosRelTo ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELTO_PAGE ; 
754+                                         $ style ['hPosRelTo ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELTO_MARGIN ;  // inner section area 
662755                                        $ style ['pos ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELATIVE ;
663756                                        $ style ['wrap ' ] = \PhpOffice \PhpWord \Style \Image::WRAP_TIGHT ;
664757                                        $ style ['overlap ' ] = true ;
@@ -773,6 +866,58 @@ protected static function mapAlign($cssAlignment)
773866        }
774867    }
775868
869+     /** 
870+      * Transforms a HTML/CSS alignment into a \PhpOffice\PhpWord\SimpleType\Jc 
871+      * 
872+      * @param string $cssAlignment 
873+      * @return string|null 
874+      */ 
875+     protected  static  function  mapAlignVertical ($ alignment )
876+     {
877+         $ alignment  = strtolower ($ alignment );
878+         switch  ($ alignment ) {
879+             case  'top ' :
880+             case  'baseline ' :
881+             case  'bottom ' :
882+                 return  $ alignment ;
883+             case  'middle ' :
884+                 return  'center ' ;
885+             case  'sub ' :
886+                 return  'bottom ' ;
887+             case  'text-top ' :
888+             case  'baseline ' :
889+                 return  'top ' ;
890+             default :
891+                 // @discuss - which one should apply: 
892+                 // - Word uses default vert. alignment: top 
893+                 // - all browsers use default vert. alignment: middle 
894+                 // Returning empty string means attribute wont be set so use Word default (top). 
895+                 return  '' ;
896+         }
897+     }
898+ 
899+     /** 
900+     * Map list style for ordered list 
901+     * 
902+     * @param string $cssListType 
903+     */ 
904+     protected  static  function  mapListType ($ cssListType )
905+     {
906+         switch  ($ cssListType ) {
907+             case  'a ' :
908+                 return  NumberFormat::LOWER_LETTER ; // a, b, c, .. 
909+             case  'A ' :
910+                 return  NumberFormat::UPPER_LETTER ; // A, B, C, .. 
911+             case  'i ' :
912+                 return  NumberFormat::LOWER_ROMAN ; // i, ii, iii, iv, .. 
913+             case  'I ' :
914+                 return  NumberFormat::UPPER_ROMAN ; // I, II, III, IV, .. 
915+             case  '1 ' :
916+             default :
917+                 return  NumberFormat::DECIMAL ; // 1, 2, 3, .. 
918+         }
919+     }
920+ 
776921    /** 
777922     * Parse line break 
778923     * 
@@ -808,4 +953,38 @@ protected static function parseLink($node, $element, &$styles)
808953
809954        return  $ element ->addLink ($ target , $ node ->textContent , $ styles ['font ' ], $ styles ['paragraph ' ]);
810955    }
956+ 
957+     /** 
958+     * Render horizontal rule 
959+     * Note: Word rule is not the same as HTML's <hr> since it does not support width and thus neither alignment 
960+     * 
961+     * @param \DOMNode $node 
962+     * @param \PhpOffice\PhpWord\Element\AbstractContainer $element 
963+     */ 
964+     protected  static  function  parseHorizRule ($ node , $ element )
965+     {
966+         $ styles  = self ::parseInlineStyle ($ node );
967+ 
968+         // <hr> is implemented as an empty paragraph - extending 100% inside the section 
969+         // Some properties may be controlled, e.g. <hr style="border-bottom: 3px #DDDDDD solid; margin-bottom: 0;"> 
970+ 
971+         $ fontStyle  = $ styles  + ['size '  => 3 ];
972+ 
973+         $ paragraphStyle  = $ styles  + [
974+             'lineHeight '  => 0.25 , // multiply default line height - e.g. 1, 1.5 etc 
975+             'spacing '  => 0 , // twip 
976+             'spaceBefore '  => 120 , // twip, 240/2 (default line height) 
977+             'spaceAfter '  => 120 , // twip 
978+             'borderBottomSize '  => empty ($ styles ['line-height ' ]) ? 1  : $ styles ['line-height ' ],
979+             'borderBottomColor '  => empty ($ styles ['color ' ]) ? '000000 '  : $ styles ['color ' ],
980+             'borderBottomStyle '  => 'single ' , // same as "solid" 
981+         ];
982+ 
983+         $ element ->addText ("" , $ fontStyle , $ paragraphStyle );
984+ 
985+         // Notes: <hr/> cannot be: 
986+         // - table - throws error "cannot be inside textruns", e.g. lists 
987+         // - line - that is a shape, has different behaviour 
988+         // - repeated text, e.g. underline "_", because of unpredictable line wrapping 
989+     }
811990}
0 commit comments