From e3105d400322aadc9805d8c513a370499a961ff4 Mon Sep 17 00:00:00 2001 From: Fonata Date: Sun, 10 Mar 2019 11:56:32 +0100 Subject: [PATCH 1/3] Renamed protected functions: no end user should be tempted to call them --- src/Csv.php | 22 ++++++++++++---------- src/extensions/DatatypeTrait.php | 2 +- tests/methods/ParseTest.php | 2 +- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index cf9a208..f73d1e9 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -378,11 +378,11 @@ class Csv { if (strlen($input) <= PHP_MAXPATHLEN && is_readable($input)) { $this->file = $input; - $this->data = $this->parse_file(); + $this->data = $this->_parse_file(); } else { $this->file = null; $this->file_data = &$input; - $this->data = $this->parse_string(); + $this->data = $this->_parse_string(); } return $this->data !== false; @@ -523,7 +523,7 @@ class Csv { // parse data if ($parse) { - $this->data = $this->parse_string(); + $this->data = $this->_parse_string(); } return $this->delimiter; @@ -573,13 +573,13 @@ class Csv { /** * Parse File - * Read file to string and call parse_string() + * Read file to string and call _parse_string() * * @param string|null $file Local CSV file * * @return array|bool */ - protected function parse_file($file = null) { + protected function _parse_file($file = null) { if (is_null($file)) { $file = $this->file; } @@ -588,13 +588,15 @@ class Csv { $this->load_data($file); } - return !empty($this->file_data) ? $this->parse_string() : false; + return !empty($this->file_data) ? $this->_parse_string() : false; } /** - * Parse CSV strings to arrays. If you need BOM detection or character - * encoding conversion, please call load_data() first, followed by a call to - * parse_string() with no parameters. + * Internal function to parse CSV strings to arrays. + * + * If you need BOM detection or character encoding conversion, please call + * $csv->load_data($your_data_string) first, followed by a call to + * $csv->parse($csv->file_data). * * To detect field separators, please use auto() instead. * @@ -602,7 +604,7 @@ class Csv { * * @return array|false - 2D array with CSV data, or false on failure */ - protected function parse_string($data = null) { + protected function _parse_string($data = null) { if (empty($data)) { if ($this->_check_data()) { $data = &$this->file_data; diff --git a/src/extensions/DatatypeTrait.php b/src/extensions/DatatypeTrait.php index a939006..5b22935 100644 --- a/src/extensions/DatatypeTrait.php +++ b/src/extensions/DatatypeTrait.php @@ -51,7 +51,7 @@ trait DatatypeTrait { */ public function getDatatypes() { if (empty($this->data)) { - $this->data = $this->parse_string(); + $this->data = $this->_parse_string(); } if (!is_array($this->data)) { throw new \UnexpectedValueException('No data set yet.'); diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 4aa4bcd..db3b3ae 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -88,7 +88,7 @@ class ParseTest extends TestCase { $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; $expected_data = [86545235689, 34365587654, 13469874576]; - $actual_data = $this->invokeMethod($this->csv, 'parse_string', array($sInput)); + $actual_data = $this->invokeMethod($this->csv, '_parse_string', array($sInput)); $actual_column = array_map('reset', $actual_data); $this->assertEquals($expected_data, $actual_column); $this->assertEquals([ From 637f79c2e65828e1522fa7678218c9eeadd37f78 Mon Sep 17 00:00:00 2001 From: Fonata Date: Sun, 10 Mar 2019 11:59:28 +0100 Subject: [PATCH 2/3] Improved documentation and parameter declaration No functional changes. --- src/Csv.php | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index f73d1e9..0980a93 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -879,11 +879,16 @@ class Csv { } /** - * Load local file or string + * Load local file or string. * - * @param string|null $input local CSV file + * Only use this function if auto() and parse() don't handle your data well. * - * @return true or false + * This function load_data() is able to handle BOMs and encodings. The data + * is stored within the $this->file_data class field. + * + * @param string|null $input local CSV file or CSV data as a string + * + * @return bool True on success */ public function load_data($input = null) { $data = null; @@ -894,6 +899,7 @@ class Csv { } elseif (\strlen($input) <= PHP_MAXPATHLEN && file_exists($input)) { $file = $input; } else { + // It is CSV data as a string. $data = $input; } @@ -1075,12 +1081,12 @@ class Csv { * Enclose values if needed * - only used by unparse() * - * @param string $value Cell value to process - * @param string $delimiter Character to put between cells on the same row + * @param string|null $value Cell value to process + * @param string $delimiter Character to put between cells on the same row * * @return string Processed value */ - protected function _enclose_value($value = null, $delimiter) { + protected function _enclose_value($value, $delimiter) { if ($value !== null && $value != '') { $delimiter_quoted = $delimiter ? preg_quote($delimiter, '/') . "|" From ef5e81f837b459f8ed2d59c2ac77f06e29b8b8db Mon Sep 17 00:00:00 2001 From: Fonata Date: Sun, 10 Mar 2019 12:05:45 +0100 Subject: [PATCH 3/3] Added test for our code example in issue #165 Closes #165 --- tests/methods/ParseTest.php | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index db3b3ae..94ea974 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -129,6 +129,38 @@ class ParseTest extends TestCase { ], $aCity); } + /** + * Tests if we can handle BOMs in string data, in contrast to loading files. + */ + public function testStringWithLeadingBOM() { + $string_with_bom = strtr( + file_get_contents(__DIR__ . '/../example_files/UTF-8_with_BOM_and_sep_row.csv'), + ["sep=;\n" => '']); + + // Is the BOM still there? + self::assertSame(0xEF, ord($string_with_bom)); + + $this->csv->output_encoding = 'UTF-8'; + $this->csv->delimiter = ';'; + self::assertTrue($this->csv->load_data($string_with_bom)); + self::assertTrue($this->csv->parse($this->csv->file_data)); + + // This also tests if ::load_data removed the BOM from the data; + // otherwise the 'title' column would have 3 extra bytes. + $this->assertEquals([ + 'title', + 'isbn', + 'publishedAt', + ], array_keys(reset($this->csv->data))); + + $titles = array_column($this->csv->data, 'title'); + $this->assertEquals([ + 'Красивая кулинария', + 'The Wine Connoisseurs', + 'Weißwein', + ], $titles); + } + public function testWithMultipleNewlines() { $this->csv->auto(__DIR__ . '/../example_files/multiple_empty_lines.csv'); $aElse9 = array_column($this->csv->data, 'else9');