Skip to content

Commit

Permalink
Backport PR #4189 Csv Method
Browse files Browse the repository at this point in the history
  • Loading branch information
oleibman committed Oct 15, 2024
1 parent 4e58d8a commit 2a1ece2
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 16 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com)
and this project adheres to [Semantic Versioning](https://semver.org).

## TBD - 2.1.2

### Added

- Method to Test Whether Csv Will Be Affected by Php9 (backport of PR #4189 intended for 3.4.0)

## 2024-09-29 2.1.1

### Fixed
Expand Down
85 changes: 69 additions & 16 deletions src/PhpSpreadsheet/Reader/Csv.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
use Throwable;

class Csv extends BaseReader
{
Expand Down Expand Up @@ -74,7 +75,7 @@ class Csv extends BaseReader
* It is anticipated that it will conditionally be set
* to null-string for Php9 and above.
*/
private static string $defaultEscapeCharacter = '\\';
private static string $defaultEscapeCharacter = PHP_VERSION_ID < 90000 ? '\\' : '';

/**
* Callback for setting defaults in construction.
Expand Down Expand Up @@ -286,6 +287,12 @@ private function openFileOrMemory(string $filename): void
if (!$fhandle) {
throw new ReaderException($filename . ' is an Invalid Spreadsheet file.');
}
if ($this->inputEncoding === 'UTF-8') {
$encoding = self::guessEncodingBom($filename);
if ($encoding !== '') {
$this->inputEncoding = $encoding;
}
}
if ($this->inputEncoding === self::GUESS_ENCODING) {
$this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
}
Expand Down Expand Up @@ -313,7 +320,7 @@ public function setTestAutoDetect(bool $value): self
private function setAutoDetect(?string $value): ?string
{
$retVal = null;
if ($value !== null && $this->testAutodetect) {
if ($value !== null && $this->testAutodetect && PHP_VERSION_ID < 90000) {
$retVal2 = @ini_set('auto_detect_line_endings', $value);
if (is_string($retVal2)) {
$retVal = $retVal2;
Expand Down Expand Up @@ -362,6 +369,20 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
// Deprecated in Php8.1
$iniset = $this->setAutoDetect('1');

try {
$this->loadStringOrFile2($filename, $spreadsheet, $dataUri);
$this->setAutoDetect($iniset);
} catch (Throwable $e) {
$this->setAutoDetect($iniset);

throw $e;
}

return $spreadsheet;
}

private function loadStringOrFile2(string $filename, Spreadsheet $spreadsheet, bool $dataUri): void
{
// Open file
if ($dataUri) {
$this->openDataUri($filename);
Expand Down Expand Up @@ -433,11 +454,6 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo

// Close file
fclose($fileHandle);

$this->setAutoDetect($iniset);

// Return
return $spreadsheet;
}

/**
Expand Down Expand Up @@ -545,6 +561,10 @@ public function getContiguous(): bool
*/
public function setEscapeCharacter(string $escapeCharacter): self
{
if (PHP_VERSION_ID >= 90000 && $escapeCharacter !== '') {
throw new ReaderException('Escape character must be null string for Php9+');
}

$this->escapeCharacter = $escapeCharacter;

return $this;
Expand Down Expand Up @@ -621,17 +641,15 @@ private static function guessEncodingTestBom(string &$encoding, string $first4,
}
}

private static function guessEncodingBom(string $filename): string
public static function guessEncodingBom(string $filename, ?string $convertString = null): string
{
$encoding = '';
$first4 = file_get_contents($filename, false, null, 0, 4);
if ($first4 !== false) {
self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
}
$first4 = $convertString ?? (string) file_get_contents($filename, false, null, 0, 4);
self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');

return $encoding;
}
Expand Down Expand Up @@ -688,4 +706,39 @@ private static function getCsv(

return fgetcsv($stream, $length, $separator, $enclosure, $escape);
}

public static function affectedByPhp9(
string $filename,
string $inputEncoding = 'UTF-8',
?string $delimiter = null,
string $enclosure = '"',
string $escapeCharacter = '\\'
): bool {
if (PHP_VERSION_ID < 70400 || PHP_VERSION_ID >= 90000) {
throw new ReaderException('Function valid only for Php7.4 or Php8'); // @codeCoverageIgnore
}
$reader1 = new self();
$reader1->setInputEncoding($inputEncoding)
->setTestAutoDetect(true)
->setEscapeCharacter($escapeCharacter)
->setDelimiter($delimiter)
->setEnclosure($enclosure);
$spreadsheet1 = $reader1->load($filename);
$sheet1 = $spreadsheet1->getActiveSheet();
$array1 = $sheet1->toArray(null, false, false);
$spreadsheet1->disconnectWorksheets();

$reader2 = new self();
$reader2->setInputEncoding($inputEncoding)
->setTestAutoDetect(false)
->setEscapeCharacter('')
->setDelimiter($delimiter)
->setEnclosure($enclosure);
$spreadsheet2 = $reader2->load($filename);
$sheet2 = $spreadsheet2->getActiveSheet();
$array2 = $sheet2->toArray(null, false, false);
$spreadsheet2->disconnectWorksheets();

return $array1 !== $array2;
}
}
3 changes: 3 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/Csv/CsvLineEndingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ protected function tearDown(): void
*/
public function testEndings(string $ending): void
{
if ($ending === "\r" && PHP_VERSION_ID >= 90000) {
self::markTestSkipped('Mac line endings not supported for Php9+');
}
$this->tempFile = $filename = File::temporaryFilename();
$data = ['123', '456', '789'];
file_put_contents($filename, implode($ending, $data));
Expand Down
8 changes: 8 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ public static function providerCanLoad(): array

public function testEscapeCharacters(): void
{
if (PHP_VERSION_ID >= 90000) {
$this->expectException(ReaderException::class);
$this->expectExceptionMessage('Escape character must be null string');
}
$reader = (new Csv())->setEscapeCharacter('"');
$worksheet = $reader->load('tests/data/Reader/CSV/backslash.csv')
->getActiveSheet();
Expand Down Expand Up @@ -230,6 +234,10 @@ public function testReadNonexistentFileName(): void
*/
public function testInferSeparator(string $escape, string $delimiter): void
{
if (PHP_VERSION_ID >= 90000 && $escape !== '') {
$this->expectException(ReaderException::class);
$this->expectExceptionMessage('Escape character must be null string');
}
$reader = new Csv();
$reader->setEscapeCharacter($escape);
$filename = 'tests/data/Reader/CSV/escape.csv';
Expand Down
37 changes: 37 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/Csv/Php9Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

declare(strict_types=1);

namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;

use PhpOffice\PhpSpreadsheet\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
use PHPUnit\Framework\TestCase;

class Php9Test extends TestCase
{
public function testAffectedByPhp9(): void
{
if (PHP_VERSION_ID >= 90000) {
$this->expectException(ReaderException::class);
$this->expectExceptionMessage('Php7.4 or Php8');
}
$dir = 'tests/data/Reader/CSV';
$files = glob("$dir/*");
self::assertNotFalse($files);
$affected = [];
foreach ($files as $file) {
$base = basename($file);
$encoding = 'UTF-8';
if (str_contains($base, 'utf') && !str_contains($base, 'bom')) {
$encoding = 'guess';
}
$result = Csv::affectedByPhp9($file, $encoding);
if ($result) {
$affected[] = $base;
}
}
$expected = ['backslash.csv', 'escape.csv', 'linend.mac.csv'];
self::assertSame($expected, $affected);
}
}
1 change: 1 addition & 0 deletions tests/data/Reader/CSV/linend.mac.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
A,12,3
Expand Down
2 changes: 2 additions & 0 deletions tests/data/Reader/CSV/linend.unix.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
A,1
2,3
2 changes: 2 additions & 0 deletions tests/data/Reader/CSV/linend.win.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
A,1
2,3

0 comments on commit 2a1ece2

Please sign in to comment.