diff --git a/parsecsv.lib.php b/parsecsv.lib.php index 18ed88e..7e29573 100644 --- a/parsecsv.lib.php +++ b/parsecsv.lib.php @@ -841,15 +841,18 @@ class parseCSV { if (strpos($data, "\xef\xbb\xbf") === 0) { // strip off BOM (UTF-8) $data = substr($data, 3); + $this->encoding('UTF-8'); } else if (strpos($data, "\xff\xfe") === 0) { // strip off BOM (UTF-16 little endian) $data = substr($data, 2); + $this->encoding("UCS-2LE"); } else if (strpos($data, "\xfe\xff") === 0) { // strip off BOM (UTF-16 big endian) $data = substr($data, 2); + $this->encoding("UTF-16"); } if ($this->convert_encoding) { diff --git a/tests/methods/parse_test.php b/tests/methods/parse_test.php index 7652571..03ef03a 100644 --- a/tests/methods/parse_test.php +++ b/tests/methods/parse_test.php @@ -40,17 +40,26 @@ class parse_test extends PHPUnit\Framework\TestCase { } public function test_sep_row_auto_detection_UTF8_no_BOM() { - $this->csv->auto(__DIR__ . '/../example_files/UTF-8_sep_row_but_no_BOM.csv'); - $this->assertEquals($this->_get_magazines_data(), $this->csv->data); + $this->_autoparse_magazine_file( + __DIR__ . '/../example_files/UTF-8_sep_row_but_no_BOM.csv'); } public function test_sep_row_auto_detection_UTF8() { - $this->csv->auto(__DIR__ . '/../example_files/UTF-8_with_BOM_and_sep_row.csv'); - $this->assertEquals($this->_get_magazines_data(), $this->csv->data); + $this->_autoparse_magazine_file( + __DIR__ . '/../example_files/UTF-8_with_BOM_and_sep_row.csv'); } public function test_sep_row_auto_detection_UTF16() { - $this->csv->auto(__DIR__ . '/../example_files/UTF-16LE_with_BOM_and_sep_row.csv'); + $this->_autoparse_magazine_file( + __DIR__ . '/../example_files/UTF-16LE_with_BOM_and_sep_row.csv'); + } + + protected function _autoparse_magazine_file($file) { + // This file (parse_test.php) is encoded in UTF-8, hence comparison will + // fail unless we to this: + $this->csv->output_encoding = 'UTF-8'; + + $this->csv->auto($file); $this->assertEquals($this->_get_magazines_data(), $this->csv->data); }