From 15e028fd93fcb4e9186b901ac7b472aed90ab467 Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Mon, 14 Oct 2024 22:00:52 -0700 Subject: [PATCH] Backport PR #4189 Csv Method --- .github/workflows/main.yml | 10 +-- CHANGELOG.md | 6 ++ src/PhpSpreadsheet/Reader/Csv.php | 85 +++++++++++++++---- src/PhpSpreadsheet/Reader/Html.php | 2 +- .../Reader/Csv/CsvLineEndingTest.php | 3 + .../Reader/Csv/CsvTest.php | 8 ++ .../Reader/Csv/Php9Test.php | 37 ++++++++ tests/data/Reader/CSV/linend.mac.csv | 1 + tests/data/Reader/CSV/linend.unix.csv | 2 + tests/data/Reader/CSV/linend.win.csv | 2 + 10 files changed, 134 insertions(+), 22 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Reader/Csv/Php9Test.php create mode 100644 tests/data/Reader/CSV/linend.mac.csv create mode 100644 tests/data/Reader/CSV/linend.unix.csv create mode 100644 tests/data/Reader/CSV/linend.win.csv diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index df117d45f2..6033449353 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -79,7 +79,7 @@ jobs: - name: Setup PHP, with composer and extensions uses: shivammathur/setup-php@v2 with: - php-version: 8.1 + php-version: 8.3 extensions: ctype, dom, gd, iconv, fileinfo, libxml, mbstring, simplexml, xml, xmlreader, xmlwriter, zip, zlib coverage: none tools: cs2pr @@ -110,7 +110,7 @@ jobs: - name: Setup PHP, with composer and extensions uses: shivammathur/setup-php@v2 with: - php-version: 8.1 + php-version: 8.3 extensions: ctype, dom, gd, iconv, fileinfo, libxml, mbstring, simplexml, xml, xmlreader, xmlwriter, zip, zlib coverage: none tools: cs2pr @@ -141,7 +141,7 @@ jobs: - name: Setup PHP, with composer and extensions uses: shivammathur/setup-php@v2 with: - php-version: 8.1 + php-version: 8.3 extensions: ctype, dom, gd, iconv, fileinfo, libxml, mbstring, simplexml, xml, xmlreader, xmlwriter, zip, zlib coverage: none tools: cs2pr @@ -172,7 +172,7 @@ jobs: - name: Setup PHP, with composer and extensions uses: shivammathur/setup-php@v2 with: - php-version: 8.1 + php-version: 8.3 extensions: ctype, dom, gd, iconv, fileinfo, libxml, mbstring, simplexml, xml, xmlreader, xmlwriter, zip, zlib coverage: none tools: cs2pr @@ -205,7 +205,7 @@ jobs: - name: Setup PHP, with composer and extensions uses: shivammathur/setup-php@v2 with: - php-version: 8.1 + php-version: 8.3 extensions: ctype, dom, gd, iconv, fileinfo, libxml, mbstring, simplexml, xml, xmlreader, xmlwriter, zip, zlib coverage: pcov diff --git a/CHANGELOG.md b/CHANGELOG.md index 170398c36c..618eca7915 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com) and this project adheres to [Semantic Versioning](https://semver.org). +## TBD - 1.29.3 + +### Added + +- Method to Test Whether Csv Will Be Affected by Php9 (backport of PR #4189 intended for 3.4.0) + ## 1.29.2 - 2024-09-29 ### Fixed diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php index 3feec8e885..b866892757 100644 --- a/src/PhpSpreadsheet/Reader/Csv.php +++ b/src/PhpSpreadsheet/Reader/Csv.php @@ -10,6 +10,7 @@ use PhpOffice\PhpSpreadsheet\Shared\StringHelper; use PhpOffice\PhpSpreadsheet\Spreadsheet; use PhpOffice\PhpSpreadsheet\Style\NumberFormat; +use Throwable; class Csv extends BaseReader { @@ -85,7 +86,7 @@ class Csv extends BaseReader * It is anticipated that it will conditionally be set * to null-string for Php9 and above. */ - private static string $defaultEscapeCharacter = '\\'; + private static string $defaultEscapeCharacter = PHP_VERSION_ID < 90000 ? '\\' : ''; /** * Callback for setting defaults in construction. @@ -295,6 +296,12 @@ private function openFileOrMemory(string $filename): void if (!$fhandle) { throw new Exception($filename . ' is an Invalid Spreadsheet file.'); } + if ($this->inputEncoding === 'UTF-8') { + $encoding = self::guessEncodingBom($filename); + if ($encoding !== '') { + $this->inputEncoding = $encoding; + } + } if ($this->inputEncoding === self::GUESS_ENCODING) { $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding); } @@ -322,7 +329,7 @@ public function setTestAutoDetect(bool $value): self private function setAutoDetect(?string $value): ?string { $retVal = null; - if ($value !== null && $this->testAutodetect) { + if ($value !== null && $this->testAutodetect && PHP_VERSION_ID < 90000) { $retVal2 = @ini_set('auto_detect_line_endings', $value); if (is_string($retVal2)) { $retVal = $retVal2; @@ -371,6 +378,20 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo // Deprecated in Php8.1 $iniset = $this->setAutoDetect('1'); + try { + $this->loadStringOrFile2($filename, $spreadsheet, $dataUri); + $this->setAutoDetect($iniset); + } catch (Throwable $e) { + $this->setAutoDetect($iniset); + + throw $e; + } + + return $spreadsheet; + } + + private function loadStringOrFile2(string $filename, Spreadsheet $spreadsheet, bool $dataUri): void + { // Open file if ($dataUri) { $this->openDataUri($filename); @@ -428,11 +449,6 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo // Close file fclose($fileHandle); - - $this->setAutoDetect($iniset); - - // Return - return $spreadsheet; } /** @@ -544,6 +560,10 @@ public function getContiguous(): bool */ public function setEscapeCharacter(string $escapeCharacter): self { + if (PHP_VERSION_ID >= 90000 && $escapeCharacter !== '') { + throw new ReaderException('Escape character must be null string for Php9+'); + } + $this->escapeCharacter = $escapeCharacter; return $this; @@ -620,17 +640,15 @@ private static function guessEncodingTestBom(string &$encoding, string $first4, } } - private static function guessEncodingBom(string $filename): string + public static function guessEncodingBom(string $filename, ?string $convertString = null): string { $encoding = ''; - $first4 = file_get_contents($filename, false, null, 0, 4); - if ($first4 !== false) { - self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8'); - self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE'); - self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE'); - self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE'); - self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE'); - } + $first4 = $convertString ?? (string) file_get_contents($filename, false, null, 0, 4); + self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8'); + self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE'); + self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE'); + self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE'); + self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE'); return $encoding; } @@ -679,4 +697,39 @@ private static function getCsv( return fgetcsv($stream, $length, $separator, $enclosure, $escape); } + + public static function affectedByPhp9( + string $filename, + string $inputEncoding = 'UTF-8', + ?string $delimiter = null, + string $enclosure = '"', + string $escapeCharacter = '\\' + ): bool { + if (PHP_VERSION_ID < 70400 || PHP_VERSION_ID >= 90000) { + throw new ReaderException('Function valid only for Php7.4 or Php8'); // @codeCoverageIgnore + } + $reader1 = new self(); + $reader1->setInputEncoding($inputEncoding) + ->setTestAutoDetect(true) + ->setEscapeCharacter($escapeCharacter) + ->setDelimiter($delimiter) + ->setEnclosure($enclosure); + $spreadsheet1 = $reader1->load($filename); + $sheet1 = $spreadsheet1->getActiveSheet(); + $array1 = $sheet1->toArray(null, false, false); + $spreadsheet1->disconnectWorksheets(); + + $reader2 = new self(); + $reader2->setInputEncoding($inputEncoding) + ->setTestAutoDetect(false) + ->setEscapeCharacter('') + ->setDelimiter($delimiter) + ->setEnclosure($enclosure); + $spreadsheet2 = $reader2->load($filename); + $sheet2 = $spreadsheet2->getActiveSheet(); + $array2 = $sheet2->toArray(null, false, false); + $spreadsheet2->disconnectWorksheets(); + + return $array1 !== $array2; + } } diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index 19bf67b200..bfb52401aa 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -168,7 +168,7 @@ private function readBeginning(): string private function readEnding(): string { $meta = stream_get_meta_data($this->fileHandle); - $filename = $meta['uri']; // @phpstan-ignore-line + $filename = $meta['uri']; $size = (int) filesize($filename); if ($size === 0) { diff --git a/tests/PhpSpreadsheetTests/Reader/Csv/CsvLineEndingTest.php b/tests/PhpSpreadsheetTests/Reader/Csv/CsvLineEndingTest.php index 1ac17b981d..2d73cbc37b 100644 --- a/tests/PhpSpreadsheetTests/Reader/Csv/CsvLineEndingTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Csv/CsvLineEndingTest.php @@ -24,6 +24,9 @@ protected function tearDown(): void */ public function testEndings(string $ending): void { + if ($ending === "\r" && PHP_VERSION_ID >= 90000) { + self::markTestSkipped('Mac line endings not supported for Php9+'); + } $this->tempFile = $filename = File::temporaryFilename(); $data = ['123', '456', '789']; file_put_contents($filename, implode($ending, $data)); diff --git a/tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php b/tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php index 062f4f7d9d..03b3e08a9b 100644 --- a/tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php @@ -120,6 +120,10 @@ public static function providerCanLoad(): array public function testEscapeCharacters(): void { + if (PHP_VERSION_ID >= 90000) { + $this->expectException(ReaderException::class); + $this->expectExceptionMessage('Escape character must be null string'); + } $reader = (new Csv())->setEscapeCharacter('"'); $worksheet = $reader->load('tests/data/Reader/CSV/backslash.csv') ->getActiveSheet(); @@ -236,6 +240,10 @@ public function testReadNonexistentFileName(): void */ public function testInferSeparator(string $escape, string $delimiter): void { + if (PHP_VERSION_ID >= 90000 && $escape !== '') { + $this->expectException(ReaderException::class); + $this->expectExceptionMessage('Escape character must be null string'); + } $reader = new Csv(); $reader->setEscapeCharacter($escape); $filename = 'tests/data/Reader/CSV/escape.csv'; diff --git a/tests/PhpSpreadsheetTests/Reader/Csv/Php9Test.php b/tests/PhpSpreadsheetTests/Reader/Csv/Php9Test.php new file mode 100644 index 0000000000..131c366aeb --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Csv/Php9Test.php @@ -0,0 +1,37 @@ += 90000) { + $this->expectException(ReaderException::class); + $this->expectExceptionMessage('Php7.4 or Php8'); + } + $dir = 'tests/data/Reader/CSV'; + $files = glob("$dir/*"); + self::assertNotFalse($files); + $affected = []; + foreach ($files as $file) { + $base = basename($file); + $encoding = 'UTF-8'; + if (str_contains($base, 'utf') && !str_contains($base, 'bom')) { + $encoding = 'guess'; + } + $result = Csv::affectedByPhp9($file, $encoding); + if ($result) { + $affected[] = $base; + } + } + $expected = ['backslash.csv', 'escape.csv', 'linend.mac.csv']; + self::assertSame($expected, $affected); + } +} diff --git a/tests/data/Reader/CSV/linend.mac.csv b/tests/data/Reader/CSV/linend.mac.csv new file mode 100644 index 0000000000..88ee3ed45f --- /dev/null +++ b/tests/data/Reader/CSV/linend.mac.csv @@ -0,0 +1 @@ +A,1 2,3 \ No newline at end of file diff --git a/tests/data/Reader/CSV/linend.unix.csv b/tests/data/Reader/CSV/linend.unix.csv new file mode 100644 index 0000000000..54746af89a --- /dev/null +++ b/tests/data/Reader/CSV/linend.unix.csv @@ -0,0 +1,2 @@ +A,1 +2,3 diff --git a/tests/data/Reader/CSV/linend.win.csv b/tests/data/Reader/CSV/linend.win.csv new file mode 100644 index 0000000000..badb9dadf7 --- /dev/null +++ b/tests/data/Reader/CSV/linend.win.csv @@ -0,0 +1,2 @@ +A,1 +2,3