mirror of
https://github.com/parsecsv/parsecsv-for-php.git
synced 2026-02-19 08:36:39 +00:00
When we find a BOM, we know the input encoding. Let's use it!
Also: UTF tests not failing anymore because we are specifying the output encoding now.
This commit is contained in:
@@ -841,15 +841,18 @@ class parseCSV {
|
|||||||
if (strpos($data, "\xef\xbb\xbf") === 0) {
|
if (strpos($data, "\xef\xbb\xbf") === 0) {
|
||||||
// strip off BOM (UTF-8)
|
// strip off BOM (UTF-8)
|
||||||
$data = substr($data, 3);
|
$data = substr($data, 3);
|
||||||
|
$this->encoding('UTF-8');
|
||||||
}
|
}
|
||||||
else if (strpos($data, "\xff\xfe") === 0) {
|
else if (strpos($data, "\xff\xfe") === 0) {
|
||||||
// strip off BOM (UTF-16 little endian)
|
// strip off BOM (UTF-16 little endian)
|
||||||
$data = substr($data, 2);
|
$data = substr($data, 2);
|
||||||
|
$this->encoding("UCS-2LE");
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (strpos($data, "\xfe\xff") === 0) {
|
else if (strpos($data, "\xfe\xff") === 0) {
|
||||||
// strip off BOM (UTF-16 big endian)
|
// strip off BOM (UTF-16 big endian)
|
||||||
$data = substr($data, 2);
|
$data = substr($data, 2);
|
||||||
|
$this->encoding("UTF-16");
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($this->convert_encoding) {
|
if ($this->convert_encoding) {
|
||||||
|
|||||||
@@ -40,17 +40,26 @@ class parse_test extends PHPUnit\Framework\TestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public function test_sep_row_auto_detection_UTF8_no_BOM() {
|
public function test_sep_row_auto_detection_UTF8_no_BOM() {
|
||||||
$this->csv->auto(__DIR__ . '/../example_files/UTF-8_sep_row_but_no_BOM.csv');
|
$this->_autoparse_magazine_file(
|
||||||
$this->assertEquals($this->_get_magazines_data(), $this->csv->data);
|
__DIR__ . '/../example_files/UTF-8_sep_row_but_no_BOM.csv');
|
||||||
}
|
}
|
||||||
|
|
||||||
public function test_sep_row_auto_detection_UTF8() {
|
public function test_sep_row_auto_detection_UTF8() {
|
||||||
$this->csv->auto(__DIR__ . '/../example_files/UTF-8_with_BOM_and_sep_row.csv');
|
$this->_autoparse_magazine_file(
|
||||||
$this->assertEquals($this->_get_magazines_data(), $this->csv->data);
|
__DIR__ . '/../example_files/UTF-8_with_BOM_and_sep_row.csv');
|
||||||
}
|
}
|
||||||
|
|
||||||
public function test_sep_row_auto_detection_UTF16() {
|
public function test_sep_row_auto_detection_UTF16() {
|
||||||
$this->csv->auto(__DIR__ . '/../example_files/UTF-16LE_with_BOM_and_sep_row.csv');
|
$this->_autoparse_magazine_file(
|
||||||
|
__DIR__ . '/../example_files/UTF-16LE_with_BOM_and_sep_row.csv');
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function _autoparse_magazine_file($file) {
|
||||||
|
// This file (parse_test.php) is encoded in UTF-8, hence comparison will
|
||||||
|
// fail unless we to this:
|
||||||
|
$this->csv->output_encoding = 'UTF-8';
|
||||||
|
|
||||||
|
$this->csv->auto($file);
|
||||||
$this->assertEquals($this->_get_magazines_data(), $this->csv->data);
|
$this->assertEquals($this->_get_magazines_data(), $this->csv->data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user