From ccad5c83600646cfcbb7833a31e923a7d033015e Mon Sep 17 00:00:00 2001 From: Matthew de Marillac Date: Sat, 25 Nov 2017 13:07:24 +0100 Subject: [PATCH] Remove BOM from UTF files. Implementation similar to suggestion from, and thus closes #83 --- parsecsv.lib.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/parsecsv.lib.php b/parsecsv.lib.php index 13725ee..8001442 100644 --- a/parsecsv.lib.php +++ b/parsecsv.lib.php @@ -838,6 +838,20 @@ class parseCSV { $data = ltrim($strip[1]); } + if (strpos($data, "\xef\xbb\xbf") === 0) { + // strip off BOM (UTF-8) + $data = substr($data, 3); + } + else if (strpos($data, "\xff\xfe") === 0) { + // strip off BOM (UTF-16 little endian) + $data = substr($data, 2); + } + + else if (strpos($data, "\xfe\xff") === 0) { + // strip off BOM (UTF-16 big endian) + $data = substr($data, 2); + } + if ($this->convert_encoding) { $data = iconv($this->input_encoding, $this->output_encoding, $data); }