When we find a BOM, we know the input encoding. Let's use it!

Also: UTF tests not failing anymore because we are specifying the output encoding now.
2026-02-19 08:36:39 +00:00 · 2017-11-25 13:43:19 +01:00
parent c818fff81a
commit 4d011827f5
2 changed files with 17 additions and 5 deletions
--- a/parsecsv.lib.php
+++ b/parsecsv.lib.php
@@ -841,15 +841,18 @@ class parseCSV {
            if (strpos($data, "\xef\xbb\xbf") === 0) {
                // strip off BOM (UTF-8)
                $data = substr($data, 3);
+                $this->encoding('UTF-8');
            }
            else if (strpos($data, "\xff\xfe") === 0) {
                // strip off BOM (UTF-16 little endian)
                $data = substr($data, 2);
+                $this->encoding("UCS-2LE");
            }

            else if (strpos($data, "\xfe\xff") === 0) {
                // strip off BOM (UTF-16 big endian)
                $data = substr($data, 2);
+                $this->encoding("UTF-16");
            }

            if ($this->convert_encoding) {