mirror of
https://github.com/parsecsv/parsecsv-for-php.git
synced 2026-02-19 08:36:39 +00:00
Extracted the long detection algorithm from 'auto()' into new function.
Also removed unnecessary brackets () in ternary on the way.
This commit is contained in:
120
parsecsv.lib.php
120
parsecsv.lib.php
@@ -543,59 +543,8 @@ class parseCSV {
|
||||
$data = &$this->file_data;
|
||||
}
|
||||
|
||||
$chars = array();
|
||||
$strlen = strlen($data);
|
||||
$enclosed = false;
|
||||
$n = 1;
|
||||
$to_end = true;
|
||||
$this->_guess_delimiter($search_depth, $preferred, $enclosure, $data);
|
||||
|
||||
// walk specific depth finding possible delimiter characters
|
||||
for ($i = 0; $i < $strlen; $i++) {
|
||||
$ch = $data{$i};
|
||||
$nch = (isset($data{$i + 1})) ? $data{$i + 1} : false;
|
||||
$pch = (isset($data{$i - 1})) ? $data{$i - 1} : false;
|
||||
|
||||
// open and closing quotes
|
||||
if ($ch == $enclosure) {
|
||||
if (!$enclosed || $nch != $enclosure) {
|
||||
$enclosed = ($enclosed) ? false : true;
|
||||
} elseif ($enclosed) {
|
||||
$i++;
|
||||
}
|
||||
|
||||
// end of row
|
||||
} elseif (($ch == "\n" && $pch != "\r" || $ch == "\r") && !$enclosed) {
|
||||
if ($n >= $search_depth) {
|
||||
$strlen = 0;
|
||||
$to_end = false;
|
||||
} else {
|
||||
$n++;
|
||||
}
|
||||
|
||||
// count character
|
||||
} elseif (!$enclosed) {
|
||||
if (!preg_match('/[' . preg_quote($this->auto_non_chars, '/') . ']/i', $ch)) {
|
||||
if (!isset($chars[$ch][$n])) {
|
||||
$chars[$ch][$n] = 1;
|
||||
} else {
|
||||
$chars[$ch][$n]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// filtering
|
||||
$depth = ($to_end) ? $n - 1 : $n;
|
||||
$filtered = array();
|
||||
foreach ($chars as $char => $value) {
|
||||
if ($match = $this->_check_count($char, $value, $depth, $preferred)) {
|
||||
$filtered[$match] = $char;
|
||||
}
|
||||
}
|
||||
|
||||
// capture most probable delimiter
|
||||
ksort($filtered);
|
||||
$this->delimiter = reset($filtered);
|
||||
|
||||
// parse data
|
||||
if ($parse) {
|
||||
@@ -1188,4 +1137,71 @@ class parseCSV {
|
||||
}
|
||||
return $sep;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $search_depth Number of rows to analyze
|
||||
* @param string $preferred Preferred delimiter characters
|
||||
* @param string $enclosure Enclosure character, default is double quote
|
||||
* @param string $data The file content
|
||||
*/
|
||||
protected function _guess_delimiter($search_depth, $preferred, $enclosure, &$data) {
|
||||
$chars = [];
|
||||
$strlen = strlen($data);
|
||||
$enclosed = false;
|
||||
$n = 1;
|
||||
$to_end = true;
|
||||
|
||||
// walk specific depth finding possible delimiter characters
|
||||
for ($i = 0; $i < $strlen; $i++) {
|
||||
$ch = $data{$i};
|
||||
$nch = (isset($data{$i + 1})) ? $data{$i + 1} : false;
|
||||
$pch = (isset($data{$i - 1})) ? $data{$i - 1} : false;
|
||||
|
||||
// open and closing quotes
|
||||
if ($ch == $enclosure) {
|
||||
if (!$enclosed || $nch != $enclosure) {
|
||||
$enclosed = $enclosed ? false : true;
|
||||
}
|
||||
elseif ($enclosed) {
|
||||
$i++;
|
||||
}
|
||||
|
||||
// end of row
|
||||
}
|
||||
elseif (($ch == "\n" && $pch != "\r" || $ch == "\r") && !$enclosed) {
|
||||
if ($n >= $search_depth) {
|
||||
$strlen = 0;
|
||||
$to_end = false;
|
||||
}
|
||||
else {
|
||||
$n++;
|
||||
}
|
||||
|
||||
// count character
|
||||
}
|
||||
elseif (!$enclosed) {
|
||||
if (!preg_match('/[' . preg_quote($this->auto_non_chars, '/') . ']/i', $ch)) {
|
||||
if (!isset($chars[$ch][$n])) {
|
||||
$chars[$ch][$n] = 1;
|
||||
}
|
||||
else {
|
||||
$chars[$ch][$n]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// filtering
|
||||
$depth = $to_end ? $n - 1 : $n;
|
||||
$filtered = [];
|
||||
foreach ($chars as $char => $value) {
|
||||
if ($match = $this->_check_count($char, $value, $depth, $preferred)) {
|
||||
$filtered[$match] = $char;
|
||||
}
|
||||
}
|
||||
|
||||
// capture most probable delimiter
|
||||
ksort($filtered);
|
||||
$this->delimiter = reset($filtered);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user