Improved sep= detection and added it to auto()

2026-02-19 00:36:38 +00:00 · 2017-11-25 12:24:58 +01:00
parent a4a0dfa2fd
commit 0d14ca01be
1 changed files with 32 additions and 5 deletions
--- a/parsecsv.lib.php
+++ b/parsecsv.lib.php
@@ -543,8 +543,9 @@ class parseCSV {
            $data = &$this->file_data;
        }
-        $this->_guess_delimiter($search_depth, $preferred, $enclosure, $data);
+        if (!$this->_detect_and_remove_sep_row_from_data($data)) {
-
+            $this->_guess_delimiter($search_depth, $preferred, $enclosure, $data);
        }
        // parse data
        if ($parse) {
@@ -1126,18 +1127,44 @@ class parseCSV {
     *
     * @return string|false detected delimiter, or false if none found
     */
-    protected function _get_delimiter_from_sep($data) {
+    protected function _get_delimiter_from_sep_row($data) {
        $sep = false;
        // 32 bytes should be quite enough data for our sniffing, chosen arbitrarily
        $sepPrefix = substr($data, 0, 32);
-
+        if (preg_match('/^sep=(.)\\r?\\n/i', $sepPrefix, $sepMatch)) {
        if (preg_match('/^sep=(.)/i', $sepPrefix, $sepMatch)) {
            // we get separator.
            $sep = $sepMatch[1];
        }
        return $sep;
    }
    /**
     * @param string $data    file data
     *
     * @return bool TRUE if sep= line was found at the very beginning of the file
     */
    protected function _detect_and_remove_sep_row_from_data(&$data) {
        $sep = $this->_get_delimiter_from_sep_row($data);
        if ($sep === false) {
            return false;
        }
        $this->delimiter = $sep;
        // likely to be 5, but let's not assume we're always single-byte.
        $pos = 4 + strlen($sep);
        // the next characters should be a line-end
        if (substr($data, $pos, 1) === "\r") {
            $pos++;
        }
        if (substr($data, $pos, 1) === "\n") {
            $pos++;
        }
        // remove delimiter and its line-end
        $data = substr($data, $pos);
        return true;
    }
    /**
     * @param int $search_depth Number of rows to analyze
     * @param string $preferred Preferred delimiter characters