Merge pull request #166 from parsecsv/parse_string_internal

End users should not be tempted to call parse_string.
This commit is contained in:
susgo
2019-05-11 14:19:52 +02:00
committed by GitHub
3 changed files with 58 additions and 18 deletions

View File

@@ -378,11 +378,11 @@ class Csv {
if (strlen($input) <= PHP_MAXPATHLEN && is_readable($input)) {
$this->file = $input;
$this->data = $this->parse_file();
$this->data = $this->_parse_file();
} else {
$this->file = null;
$this->file_data = &$input;
$this->data = $this->parse_string();
$this->data = $this->_parse_string();
}
return $this->data !== false;
@@ -523,7 +523,7 @@ class Csv {
// parse data
if ($parse) {
$this->data = $this->parse_string();
$this->data = $this->_parse_string();
}
return $this->delimiter;
@@ -573,13 +573,13 @@ class Csv {
/**
* Parse File
* Read file to string and call parse_string()
* Read file to string and call _parse_string()
*
* @param string|null $file Local CSV file
*
* @return array|bool
*/
protected function parse_file($file = null) {
protected function _parse_file($file = null) {
if (is_null($file)) {
$file = $this->file;
}
@@ -588,13 +588,15 @@ class Csv {
$this->load_data($file);
}
return !empty($this->file_data) ? $this->parse_string() : false;
return !empty($this->file_data) ? $this->_parse_string() : false;
}
/**
* Parse CSV strings to arrays. If you need BOM detection or character
* encoding conversion, please call load_data() first, followed by a call to
* parse_string() with no parameters.
* Internal function to parse CSV strings to arrays.
*
* If you need BOM detection or character encoding conversion, please call
* $csv->load_data($your_data_string) first, followed by a call to
* $csv->parse($csv->file_data).
*
* To detect field separators, please use auto() instead.
*
@@ -602,7 +604,7 @@ class Csv {
*
* @return array|false - 2D array with CSV data, or false on failure
*/
protected function parse_string($data = null) {
protected function _parse_string($data = null) {
if (empty($data)) {
if ($this->_check_data()) {
$data = &$this->file_data;
@@ -877,11 +879,16 @@ class Csv {
}
/**
* Load local file or string
* Load local file or string.
*
* @param string|null $input local CSV file
* Only use this function if auto() and parse() don't handle your data well.
*
* @return true or false
* This function load_data() is able to handle BOMs and encodings. The data
* is stored within the $this->file_data class field.
*
* @param string|null $input local CSV file or CSV data as a string
*
* @return bool True on success
*/
public function load_data($input = null) {
$data = null;
@@ -892,6 +899,7 @@ class Csv {
} elseif (\strlen($input) <= PHP_MAXPATHLEN && file_exists($input)) {
$file = $input;
} else {
// It is CSV data as a string.
$data = $input;
}
@@ -1073,12 +1081,12 @@ class Csv {
* Enclose values if needed
* - only used by unparse()
*
* @param string $value Cell value to process
* @param string $delimiter Character to put between cells on the same row
* @param string|null $value Cell value to process
* @param string $delimiter Character to put between cells on the same row
*
* @return string Processed value
*/
protected function _enclose_value($value = null, $delimiter) {
protected function _enclose_value($value, $delimiter) {
if ($value !== null && $value != '') {
$delimiter_quoted = $delimiter ?
preg_quote($delimiter, '/') . "|"

View File

@@ -51,7 +51,7 @@ trait DatatypeTrait {
*/
public function getDatatypes() {
if (empty($this->data)) {
$this->data = $this->parse_string();
$this->data = $this->_parse_string();
}
if (!is_array($this->data)) {
throw new \UnexpectedValueException('No data set yet.');

View File

@@ -88,7 +88,7 @@ class ParseTest extends TestCase {
$sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\"";
$expected_data = [86545235689, 34365587654, 13469874576];
$actual_data = $this->invokeMethod($this->csv, 'parse_string', array($sInput));
$actual_data = $this->invokeMethod($this->csv, '_parse_string', array($sInput));
$actual_column = array_map('reset', $actual_data);
$this->assertEquals($expected_data, $actual_column);
$this->assertEquals([
@@ -129,6 +129,38 @@ class ParseTest extends TestCase {
], $aCity);
}
/**
* Tests if we can handle BOMs in string data, in contrast to loading files.
*/
public function testStringWithLeadingBOM() {
$string_with_bom = strtr(
file_get_contents(__DIR__ . '/../example_files/UTF-8_with_BOM_and_sep_row.csv'),
["sep=;\n" => '']);
// Is the BOM still there?
self::assertSame(0xEF, ord($string_with_bom));
$this->csv->output_encoding = 'UTF-8';
$this->csv->delimiter = ';';
self::assertTrue($this->csv->load_data($string_with_bom));
self::assertTrue($this->csv->parse($this->csv->file_data));
// This also tests if ::load_data removed the BOM from the data;
// otherwise the 'title' column would have 3 extra bytes.
$this->assertEquals([
'title',
'isbn',
'publishedAt',
], array_keys(reset($this->csv->data)));
$titles = array_column($this->csv->data, 'title');
$this->assertEquals([
'Красивая кулинария',
'The Wine Connoisseurs',
'Weißwein',
], $titles);
}
public function testWithMultipleNewlines() {
$this->csv->auto(__DIR__ . '/../example_files/multiple_empty_lines.csv');
$aElse9 = array_column($this->csv->data, 'else9');