Improved sep= detection and added it to auto()

This commit is contained in:
Christian Bläul
2017-11-25 12:24:58 +01:00
committed by Fonata
parent a4a0dfa2fd
commit 0d14ca01be

View File

@@ -543,8 +543,9 @@ class parseCSV {
$data = &$this->file_data;
}
$this->_guess_delimiter($search_depth, $preferred, $enclosure, $data);
if (!$this->_detect_and_remove_sep_row_from_data($data)) {
$this->_guess_delimiter($search_depth, $preferred, $enclosure, $data);
}
// parse data
if ($parse) {
@@ -1126,18 +1127,44 @@ class parseCSV {
*
* @return string|false detected delimiter, or false if none found
*/
protected function _get_delimiter_from_sep($data) {
protected function _get_delimiter_from_sep_row($data) {
$sep = false;
// 32 bytes should be quite enough data for our sniffing, chosen arbitrarily
$sepPrefix = substr($data, 0, 32);
if (preg_match('/^sep=(.)/i', $sepPrefix, $sepMatch)) {
if (preg_match('/^sep=(.)\\r?\\n/i', $sepPrefix, $sepMatch)) {
// we get separator.
$sep = $sepMatch[1];
}
return $sep;
}
/**
* @param string $data    file data
*
* @return bool TRUE if sep= line was found at the very beginning of the file
*/
protected function _detect_and_remove_sep_row_from_data(&$data) {
$sep = $this->_get_delimiter_from_sep_row($data);
if ($sep === false) {
return false;
}
$this->delimiter = $sep;
// likely to be 5, but let's not assume we're always single-byte.
$pos = 4 + strlen($sep);
// the next characters should be a line-end
if (substr($data, $pos, 1) === "\r") {
$pos++;
}
if (substr($data, $pos, 1) === "\n") {
$pos++;
}
// remove delimiter and its line-end
$data = substr($data, $pos);
return true;
}
/**
* @param int $search_depth Number of rows to analyze
* @param string $preferred Preferred delimiter characters