Improved sep= detection and added it to auto()

This commit is contained in:
Christian Bläul
2017-11-25 12:24:58 +01:00
committed by Fonata
parent a4a0dfa2fd
commit 0d14ca01be

View File

@@ -543,8 +543,9 @@ class parseCSV {
$data = &$this->file_data; $data = &$this->file_data;
} }
$this->_guess_delimiter($search_depth, $preferred, $enclosure, $data); if (!$this->_detect_and_remove_sep_row_from_data($data)) {
$this->_guess_delimiter($search_depth, $preferred, $enclosure, $data);
}
// parse data // parse data
if ($parse) { if ($parse) {
@@ -1126,18 +1127,44 @@ class parseCSV {
* *
* @return string|false detected delimiter, or false if none found * @return string|false detected delimiter, or false if none found
*/ */
protected function _get_delimiter_from_sep($data) { protected function _get_delimiter_from_sep_row($data) {
$sep = false; $sep = false;
// 32 bytes should be quite enough data for our sniffing, chosen arbitrarily // 32 bytes should be quite enough data for our sniffing, chosen arbitrarily
$sepPrefix = substr($data, 0, 32); $sepPrefix = substr($data, 0, 32);
if (preg_match('/^sep=(.)\\r?\\n/i', $sepPrefix, $sepMatch)) {
if (preg_match('/^sep=(.)/i', $sepPrefix, $sepMatch)) {
// we get separator. // we get separator.
$sep = $sepMatch[1]; $sep = $sepMatch[1];
} }
return $sep; return $sep;
} }
/**
* @param string $data    file data
*
* @return bool TRUE if sep= line was found at the very beginning of the file
*/
protected function _detect_and_remove_sep_row_from_data(&$data) {
$sep = $this->_get_delimiter_from_sep_row($data);
if ($sep === false) {
return false;
}
$this->delimiter = $sep;
// likely to be 5, but let's not assume we're always single-byte.
$pos = 4 + strlen($sep);
// the next characters should be a line-end
if (substr($data, $pos, 1) === "\r") {
$pos++;
}
if (substr($data, $pos, 1) === "\n") {
$pos++;
}
// remove delimiter and its line-end
$data = substr($data, $pos);
return true;
}
/** /**
* @param int $search_depth Number of rows to analyze * @param int $search_depth Number of rows to analyze
* @param string $preferred Preferred delimiter characters * @param string $preferred Preferred delimiter characters