From 5f42638a82c6cf1a49daffade434ab9231bdd24e Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Fri, 29 Dec 2023 13:21:04 -0800 Subject: [PATCH 1/4] Table Borders Fixes Fix #2402. Fix #2474. Both issues deal with borders around tables when they aren't wanted. There are 3 big issues in the code, and several minor ones. First big issue - Word table styles can have both a `styleId` and a `name`, which are often different from each other, and each of which is used by various Word functions, and what documentation I can find is far from clear on the difference. I have added a `tableStyle` property (for styleId) to Style/Table, and the reader will now preserve both `styleId` and `name`. It will similarly preserve `basedOn`, which in now a private property in Style/Paragraph, but is changed to be a protected property in Style. Second big issue - Word2007 Reader assumes that table style can be specified either by name or by inline declarations, but not both. Guess what? It is now changed to support both. This makes the delta for Reader/Word2007/AbstractPart appear to be much more complicated than it actually is. The change is almost entirely of the form: ``` if (condition) {short_code_block} else {long_code_block} ``` to ``` long_code_block ``` Third big issue. In html, td does not inherit border styles from table. In word, cell border styles are specified in table styles (as insideH/V), so they do, in effect, inherit. This is resolved, as best as I can, by having each td/th without its own style use the table border style. So adding an html border style should produce a consistent result in Html and Docx output. Minor issues: - Html table (not css) attribute border=0 should set borderStyle none on all borders; any other value should set borderStyle single. - PhpWord accepts named colors from html styles. According to the documentation that I can find, Word does not recognize those, but, in practice, it often does. Nevertheless, I have added translation to hex (borrowed from PhpSpreadsheet). If nothing else, this will increase interoperability (e.g. RTF doesn't accept named colors, and html 3-hex-digit short forms are now permitted). If a color is not found in the translation table, it will be left unchanged, so there should be no impact. - Writer/Html/Style/Table now accepts colors as 6 hex digits, as well as strings. - The parsing of border css attributes is not accurate. It rejects legitimate values. One example is `2px solid red`, since PhpWord, unlike html, insists on color before style. It rejects `2px #ff0000 solid` because it doesn't accept colors as hex strings. It does not allow the omission of the size and color attributes, but css does. The parsing is rewritten to try to overcome these deficiencies. Note, BTW, that css `border:0` is not acceptable css (size needs a unit and style is omitted); this was mentioned in one of the issues as not being handled correctly, but, since it is invalid, there should be no expectation of its being handed in any particular way. - Style/Border::hasBorder is expanded to test all of Size, Color, and Style, rather than limiting its test to Size. - Properties insideHStyle and insideVStyle are added to Style/Table. Their Color and Size equivalents already existed. - If border is not specified as an Html or css attribute on a table, it is not the same as specifying html border=0 or css border:none. The end result will be whatever the app that reads the result defaults to. The results may not be consistent between, say, Html and Docx. This is already addressed in part by setting default styling for table and td in the html head section to match the Word defaults. However, there may still be differences; the way to (mostly) avoid them is to specify a table style. --- phpstan-baseline.neon | 5 - src/PhpWord/Reader/Word2007/AbstractPart.php | 65 ++- src/PhpWord/Reader/Word2007/Styles.php | 6 +- src/PhpWord/Shared/Html.php | 83 ++- src/PhpWord/Shared/HtmlColours.php | 549 ++++++++++++++++++ src/PhpWord/Style/AbstractStyle.php | 15 + src/PhpWord/Style/Border.php | 8 + src/PhpWord/Style/Paragraph.php | 26 +- src/PhpWord/Style/Table.php | 113 ++++ src/PhpWord/Writer/HTML/Element/Table.php | 5 +- src/PhpWord/Writer/HTML/Part/Head.php | 3 + src/PhpWord/Writer/HTML/Style/Table.php | 6 +- src/PhpWord/Writer/Word2007/Part/Styles.php | 18 +- .../Writer/Word2007/Style/MarginBorder.php | 18 +- src/PhpWord/Writer/Word2007/Style/Table.php | 3 + .../Reader/Word2007/StyleTableTest.php | 55 ++ tests/PhpWordTests/Shared/Html2402Test.php | 207 +++++++ tests/PhpWordTests/Shared/HtmlTest.php | 4 +- .../Writer/ODText/Part/ContentTest.php | 10 + .../_files/documents/word.2474.docx | Bin 0 -> 27593 bytes 20 files changed, 1087 insertions(+), 112 deletions(-) create mode 100644 src/PhpWord/Shared/HtmlColours.php create mode 100644 tests/PhpWordTests/Reader/Word2007/StyleTableTest.php create mode 100644 tests/PhpWordTests/Shared/Html2402Test.php create mode 100644 tests/PhpWordTests/_files/documents/word.2474.docx diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index e7918d9174..c5541c9ddd 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -405,11 +405,6 @@ parameters: count: 1 path: src/PhpWord/Shared/Html.php - - - message: "#^Cannot call method setBorderSize\\(\\) on PhpOffice\\\\PhpWord\\\\Style\\\\Table\\|string\\.$#" - count: 1 - path: src/PhpWord/Shared/Html.php - - message: "#^Cannot call method setStyleName\\(\\) on PhpOffice\\\\PhpWord\\\\Style\\\\Table\\|string\\.$#" count: 1 diff --git a/src/PhpWord/Reader/Word2007/AbstractPart.php b/src/PhpWord/Reader/Word2007/AbstractPart.php index 95799387ed..a92e6d5958 100644 --- a/src/PhpWord/Reader/Word2007/AbstractPart.php +++ b/src/PhpWord/Reader/Word2007/AbstractPart.php @@ -592,35 +592,46 @@ protected function readTableStyle(XMLReader $xmlReader, DOMElement $domNode) $borders = array_merge($margins, ['insideH', 'insideV']); if ($xmlReader->elementExists('w:tblPr', $domNode)) { + $tblStyleName = ''; if ($xmlReader->elementExists('w:tblPr/w:tblStyle', $domNode)) { - $style = $xmlReader->getAttribute('w:val', $domNode, 'w:tblPr/w:tblStyle'); - } else { - $styleNode = $xmlReader->getElement('w:tblPr', $domNode); - $styleDefs = []; - foreach ($margins as $side) { - $ucfSide = ucfirst($side); - $styleDefs["cellMargin$ucfSide"] = [self::READ_VALUE, "w:tblCellMar/w:$side", 'w:w']; - } - foreach ($borders as $side) { - $ucfSide = ucfirst($side); - $styleDefs["border{$ucfSide}Size"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:sz']; - $styleDefs["border{$ucfSide}Color"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:color']; - $styleDefs["border{$ucfSide}Style"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:val']; - } - $styleDefs['layout'] = [self::READ_VALUE, 'w:tblLayout', 'w:type']; - $styleDefs['bidiVisual'] = [self::READ_TRUE, 'w:bidiVisual']; - $styleDefs['cellSpacing'] = [self::READ_VALUE, 'w:tblCellSpacing', 'w:w']; - $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs); - - $tablePositionNode = $xmlReader->getElement('w:tblpPr', $styleNode); - if ($tablePositionNode !== null) { - $style['position'] = $this->readTablePosition($xmlReader, $tablePositionNode); - } + $tblStyleName = $xmlReader->getAttribute('w:val', $domNode, 'w:tblPr/w:tblStyle'); + } + $styleNode = $xmlReader->getElement('w:tblPr', $domNode); + $styleDefs = []; - $indentNode = $xmlReader->getElement('w:tblInd', $styleNode); - if ($indentNode !== null) { - $style['indent'] = $this->readTableIndent($xmlReader, $indentNode); - } + foreach ($margins as $side) { + $ucfSide = ucfirst($side); + $styleDefs["cellMargin$ucfSide"] = [self::READ_VALUE, "w:tblCellMar/w:$side", 'w:w']; + } + foreach ($borders as $side) { + $ucfSide = ucfirst($side); + $styleDefs["border{$ucfSide}Size"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:sz']; + $styleDefs["border{$ucfSide}Color"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:color']; + $styleDefs["border{$ucfSide}Style"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:val']; + } + $styleDefs['layout'] = [self::READ_VALUE, 'w:tblLayout', 'w:type']; + $styleDefs['bidiVisual'] = [self::READ_TRUE, 'w:bidiVisual']; + $styleDefs['cellSpacing'] = [self::READ_VALUE, 'w:tblCellSpacing', 'w:w']; + $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs); + + $tablePositionNode = $xmlReader->getElement('w:tblpPr', $styleNode); + if ($tablePositionNode !== null) { + $style['position'] = $this->readTablePosition($xmlReader, $tablePositionNode); + } + + $indentNode = $xmlReader->getElement('w:tblInd', $styleNode); + if ($indentNode !== null) { + $style['indent'] = $this->readTableIndent($xmlReader, $indentNode); + } + if ($xmlReader->elementExists('w:basedOn', $domNode)) { + $style['basedOn'] = $xmlReader->getAttribute('w:val', $domNode, 'w:basedOn'); + } + if ($tblStyleName !== '') { + $style['tblStyle'] = $tblStyleName; + } + // this may be unneeded + if ($xmlReader->elementExists('w:name', $domNode)) { + $style['styleName'] = $xmlReader->getAttribute('w:val', $domNode, 'w:name'); } } diff --git a/src/PhpWord/Reader/Word2007/Styles.php b/src/PhpWord/Reader/Word2007/Styles.php index 4566398ad2..a6fd23dd43 100644 --- a/src/PhpWord/Reader/Word2007/Styles.php +++ b/src/PhpWord/Reader/Word2007/Styles.php @@ -63,8 +63,9 @@ public function read(PhpWord $phpWord): void foreach ($nodes as $node) { $type = $xmlReader->getAttribute('w:type', $node); $name = $xmlReader->getAttribute('w:val', $node, 'w:name'); + $styleId = $xmlReader->getAttribute('w:styleId', $node); if (null === $name) { - $name = $xmlReader->getAttribute('w:styleId', $node); + $name = $styleId; } $headingMatches = []; preg_match('/Heading\s*(\d)/i', $name, $headingMatches); @@ -96,7 +97,8 @@ public function read(PhpWord $phpWord): void case 'table': $tStyle = $this->readTableStyle($xmlReader, $node); if (!empty($tStyle)) { - $phpWord->addTableStyle($name, $tStyle); + $newTable = $phpWord->addTableStyle($styleId, $tStyle); + $newTable->setStyleName($name); } break; diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index 0a9b23979c..d58871e4a2 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -26,6 +26,7 @@ use PhpOffice\PhpWord\Element\Row; use PhpOffice\PhpWord\Element\Table; use PhpOffice\PhpWord\Settings; +use PhpOffice\PhpWord\SimpleType\Border; use PhpOffice\PhpWord\SimpleType\Jc; use PhpOffice\PhpWord\SimpleType\NumberFormat; use PhpOffice\PhpWord\Style\Paragraph; @@ -37,6 +38,8 @@ */ class Html { + private const SPECIAL_BORDER_WIDTHS = ['thin' => '0.5pt', 'thick' => '3.5pt', 'medium' => '2.0pt']; + protected static $listIndex = 0; protected static $xpath; @@ -142,7 +145,7 @@ protected static function parseInlineStyle($node, $styles = []) break; case 'bgcolor': // tables, rows, cells e.g.
header a | +header b | +header c | +
---|---|---|
1 | 2 | |
This is bold text | 6 |
header a | +header b | +header c | +
---|---|---|
1 | 2 | |
This is bold text | 6 |