From 657cec4b4eeed70e8eb84979786165b8fa40853a Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Thu, 22 Feb 2018 20:41:03 +0100 Subject: [PATCH 01/34] added enum for sort --- src/Csv.php | 12 +++--------- src/enums/SortEnum.php | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 9 deletions(-) create mode 100644 src/enums/SortEnum.php diff --git a/src/Csv.php b/src/Csv.php index 74381e4..f75bb0d 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -1,6 +1,7 @@ titles = $head; if (!empty($this->sort_by)) { - $sort_type = SORT_REGULAR; - if ($this->sort_type == 'numeric') { - $sort_type = SORT_NUMERIC; - } elseif ($this->sort_type == 'string') { - $sort_type = SORT_STRING; - } - - $this->sort_reverse ? krsort($rows, $sort_type) : ksort($rows, $sort_type); + $this->sort_reverse ? krsort($rows, $this->sort_type) : ksort($rows, $this->sort_type); if ($this->offset !== null || $this->limit !== null) { $rows = array_slice($rows, ($this->offset === null ? 0 : $this->offset), $this->limit, true); diff --git a/src/enums/SortEnum.php b/src/enums/SortEnum.php new file mode 100644 index 0000000..01a37b0 --- /dev/null +++ b/src/enums/SortEnum.php @@ -0,0 +1,18 @@ + Date: Thu, 22 Feb 2018 21:01:06 +0100 Subject: [PATCH 02/34] init implementation of abstract enum class --- src/enums/AbstractEnum.php | 40 ++++++++++++++++++++++++++++++++++++++ src/enums/DatatypeEnum.php | 2 +- src/enums/SortEnum.php | 9 +-------- 3 files changed, 42 insertions(+), 9 deletions(-) create mode 100644 src/enums/AbstractEnum.php diff --git a/src/enums/AbstractEnum.php b/src/enums/AbstractEnum.php new file mode 100644 index 0000000..aae78e1 --- /dev/null +++ b/src/enums/AbstractEnum.php @@ -0,0 +1,40 @@ +isValid($value)) { + throw new \UnexpectedValueException("Value '$value' is not part of the enum " . get_called_class()); + } + $this->value = $value; + } + + public static function getConstants(){ + $class = get_called_class(); + $reflection = new \ReflectionClass($class); + + return $reflection->getConstants(); + } + + /** + * Check if enum value is valid + * + * @param $value + * + * @return bool + */ + public static function isValid($value) + { + return in_array($value, static::getConstants(), true); + } +} diff --git a/src/enums/DatatypeEnum.php b/src/enums/DatatypeEnum.php index 8fea47d..7f490e9 100644 --- a/src/enums/DatatypeEnum.php +++ b/src/enums/DatatypeEnum.php @@ -9,7 +9,7 @@ namespace ParseCsv\enums; * * todo: needs a basic parent enum class for error handling. */ -class DatatypeEnum { +class DatatypeEnum extends AbstractEnum { const __DEFAULT = self::TYPE_STRING; diff --git a/src/enums/SortEnum.php b/src/enums/SortEnum.php index 01a37b0..e5b78f3 100644 --- a/src/enums/SortEnum.php +++ b/src/enums/SortEnum.php @@ -1,15 +1,8 @@ Date: Thu, 22 Feb 2018 21:15:00 +0100 Subject: [PATCH 03/34] added test for sort enums (todo: handle exception on test) --- src/enums/SortEnum.php | 2 ++ tests/properties/PublicPropertiesTest.php | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/enums/SortEnum.php b/src/enums/SortEnum.php index e5b78f3..42626ef 100644 --- a/src/enums/SortEnum.php +++ b/src/enums/SortEnum.php @@ -3,6 +3,8 @@ namespace ParseCsv\enums; class SortEnum extends AbstractEnum { + const __DEFAULT = self::SORT_TYPE_REGULAR; + const SORT_TYPE_REGULAR = SORT_REGULAR; const SORT_TYPE_NUMERIC = SORT_NUMERIC; diff --git a/tests/properties/PublicPropertiesTest.php b/tests/properties/PublicPropertiesTest.php index fbf85d5..0510e73 100644 --- a/tests/properties/PublicPropertiesTest.php +++ b/tests/properties/PublicPropertiesTest.php @@ -3,6 +3,7 @@ namespace ParseCsv\tests\properties; use ParseCsv\Csv; +use ParseCsv\enums\SortEnum; use PHPUnit\Framework\TestCase; class PublicPropertiesTest extends TestCase { @@ -145,4 +146,20 @@ class PublicPropertiesTest extends TestCase { $this->assertCount($counter, $this->properties); } + + public function testDefaultSortTypeIsRegular(){ + $this->assertEquals(SortEnum::SORT_TYPE_REGULAR, $this->csv->sort_type); + } + + public function testSetSortType(){ + $this->csv->sort_type = SortEnum::SORT_TYPE_NUMERIC; + $this->assertEquals(SortEnum::SORT_TYPE_NUMERIC, $this->csv->sort_type); + + $this->csv->sort_type = SortEnum::SORT_TYPE_STRING; + $this->assertEquals(SortEnum::SORT_TYPE_STRING, $this->csv->sort_type); + + $this->csv->sort_type = SortEnum::SORT_TYPE_UNKNOWN; + // todo: how to handle this exception? + $this->expectException(InvalidArgumentException::class); + } } From 958af1027e18cda627341f059ddf3f1696ec4722 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Thu, 22 Feb 2018 21:34:39 +0100 Subject: [PATCH 04/34] small code improvements --- src/Csv.php | 86 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index f75bb0d..70bbbcf 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -332,13 +332,35 @@ class Csv { * Constructor * Class constructor * - * @param string|null $input The CSV string or a direct filepath - * @param integer|null $offset Number of rows to ignore from the beginning of the data - * @param integer|null $limit Limits the number of returned rows to specified amount - * @param string|null $conditions Basic SQL-like conditions for row matching - * @param null|true $keep_file_data Keep raw file data in memory after successful parsing (useful for debugging) + * @param string|null $input The CSV string or a direct filepath + * @param integer|null $offset Number of rows to ignore from the beginning + * of the data + * @param integer|null $limit Limits the number of returned rows to + * specified amount + * @param string|null $conditions Basic SQL-like conditions for row + * matching + * @param null|true $keep_file_data Keep raw file data in memory after + * successful parsing (useful for debugging) */ - public function __construct($input = null, $offset = null, $limit = null, $conditions = null, $keep_file_data = null) { + public function __construct($input = NULL, $offset = NULL, $limit = NULL, $conditions = NULL, $keep_file_data = NULL) { + $this->init($offset, $limit, $conditions, $keep_file_data); + + if (!empty($input)) { + $this->parse($input); + } + } + + /** + * @param integer|null $offset Number of rows to ignore from the beginning + * of the data + * @param integer|null $limit Limits the number of returned rows to + * specified amount + * @param string|null $conditions Basic SQL-like conditions for row + * matching + * @param null|true $keep_file_data Keep raw file data in memory after + * successful parsing (useful for debugging) + */ + public function init($offset = NULL, $limit = NULL, $conditions = NULL, $keep_file_data = NULL) { if (!is_null($offset)) { $this->offset = $offset; } @@ -354,10 +376,6 @@ class Csv { if (!is_null($keep_file_data)) { $this->keep_file_data = $keep_file_data; } - - if (!empty($input)) { - $this->parse($input); - } } // ============================================== @@ -375,37 +393,31 @@ class Csv { * * @return bool True on success */ - public function parse($input = null, $offset = null, $limit = null, $conditions = null) { - if (is_null($input)) { - $input = $this->file; + public function parse($input = NULL, $offset = NULL, $limit = NULL, $conditions = NULL) { + if (!is_null($input)) { + $this->file = $input; } - if (!empty($input)) { - if (!is_null($offset)) { - $this->offset = $offset; - } - - if (!is_null($limit)) { - $this->limit = $limit; - } - - if (!is_null($conditions)) { - $this->conditions = $conditions; - } - - if (strlen($input) <= PHP_MAXPATHLEN && is_readable($input)) { - $this->data = $this->parse_file($input); - } else { - $this->file_data = &$input; - $this->data = $this->parse_string(); - } - - if ($this->data === false) { - return false; - } + if (empty($this->file)) { + // todo: but why true? + return true; + } + + $this->init($offset, $limit, $conditions); + + + if (strlen($this->file) <= PHP_MAXPATHLEN && is_readable($this->file)) { + $this->data = $this->parse_file($this->file); + } + else { + $this->file_data = &$this->file; + $this->data = $this->parse_string(); + } + + if ($this->data === false) { + return false; } - return true; } /** From cf91bf40ffbcdd341e301c6f5c5ec1a66a4c0f46 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Fri, 23 Feb 2018 08:11:56 +0100 Subject: [PATCH 05/34] now compatible with old sorting values --- src/Csv.php | 3 ++- src/enums/SortEnum.php | 21 ++++++++++++++++++--- tests/properties/PublicPropertiesTest.php | 16 +++++++++++----- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index 70bbbcf..7a8c2f9 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -739,7 +739,8 @@ class Csv { $this->titles = $head; if (!empty($this->sort_by)) { - $this->sort_reverse ? krsort($rows, $this->sort_type) : ksort($rows, $this->sort_type); + $sort_type = SortEnum::getSorting($this->sort_type); + $this->sort_reverse ? krsort($rows, $sort_type) : ksort($rows, $sort_type); if ($this->offset !== null || $this->limit !== null) { $rows = array_slice($rows, ($this->offset === null ? 0 : $this->offset), $this->limit, true); diff --git a/src/enums/SortEnum.php b/src/enums/SortEnum.php index 42626ef..54c3040 100644 --- a/src/enums/SortEnum.php +++ b/src/enums/SortEnum.php @@ -5,9 +5,24 @@ namespace ParseCsv\enums; class SortEnum extends AbstractEnum { const __DEFAULT = self::SORT_TYPE_REGULAR; - const SORT_TYPE_REGULAR = SORT_REGULAR; + const SORT_TYPE_REGULAR = 'regular'; - const SORT_TYPE_NUMERIC = SORT_NUMERIC; + const SORT_TYPE_NUMERIC = 'numeric'; + + const SORT_TYPE_STRING = 'string'; + + private static $sorting = array( + self::SORT_TYPE_REGULAR => SORT_REGULAR, + self::SORT_TYPE_STRING => SORT_STRING, + self::SORT_TYPE_NUMERIC => SORT_NUMERIC + ); + + public static function getSorting($type){ + if (array_key_exists($type, self::$sorting)){ + return self::$sorting[$type]; + } + + return self::$sorting[self::__DEFAULT]; + } - const SORT_TYPE_STRING = SORT_STRING; } diff --git a/tests/properties/PublicPropertiesTest.php b/tests/properties/PublicPropertiesTest.php index 0510e73..ed49354 100644 --- a/tests/properties/PublicPropertiesTest.php +++ b/tests/properties/PublicPropertiesTest.php @@ -152,14 +152,20 @@ class PublicPropertiesTest extends TestCase { } public function testSetSortType(){ - $this->csv->sort_type = SortEnum::SORT_TYPE_NUMERIC; + $this->csv->sort_type = 'numeric'; $this->assertEquals(SortEnum::SORT_TYPE_NUMERIC, $this->csv->sort_type); - $this->csv->sort_type = SortEnum::SORT_TYPE_STRING; + $this->csv->sort_type = 'string'; $this->assertEquals(SortEnum::SORT_TYPE_STRING, $this->csv->sort_type); + } - $this->csv->sort_type = SortEnum::SORT_TYPE_UNKNOWN; - // todo: how to handle this exception? - $this->expectException(InvalidArgumentException::class); + public function testGetSorting(){ + $this->csv->sort_type = 'numeric'; + $sorting = SortEnum::getSorting($this->csv->sort_type); + $this->assertEquals(SORT_NUMERIC, $sorting); + + $this->csv->sort_type = 'string'; + $sorting = SortEnum::getSorting($this->csv->sort_type); + $this->assertEquals(SORT_STRING, $sorting); } } From 343c683077199b56f5fe88aaf8d1bbb89ac5c34e Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Fri, 23 Feb 2018 10:02:13 +0100 Subject: [PATCH 06/34] corrected test for default sort type. Is set to regular now --- tests/properties/DefaultValuesPropertiesTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/properties/DefaultValuesPropertiesTest.php b/tests/properties/DefaultValuesPropertiesTest.php index 803afcc..364f7cd 100644 --- a/tests/properties/DefaultValuesPropertiesTest.php +++ b/tests/properties/DefaultValuesPropertiesTest.php @@ -57,7 +57,7 @@ class DefaultValuesPropertiesTest extends TestCase { } public function test_sort_type_default() { - $this->assertNull($this->csv->sort_type); + $this->assertEquals('regular', $this->csv->sort_type); } public function test_delimiter_default() { From 249e5a24ac99fdbe6331af1577f87ce5760a1586 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Fri, 23 Feb 2018 10:14:01 +0100 Subject: [PATCH 07/34] readded missing return statement in parse-function --- src/Csv.php | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index 7a8c2f9..e2160b6 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -394,11 +394,11 @@ class Csv { * @return bool True on success */ public function parse($input = NULL, $offset = NULL, $limit = NULL, $conditions = NULL) { - if (!is_null($input)) { - $this->file = $input; + if (is_null($input)) { + $input = $this->file; } - if (empty($this->file)) { + if (empty($input)) { // todo: but why true? return true; } @@ -406,11 +406,12 @@ class Csv { $this->init($offset, $limit, $conditions); - if (strlen($this->file) <= PHP_MAXPATHLEN && is_readable($this->file)) { - $this->data = $this->parse_file($this->file); + if (strlen($input) <= PHP_MAXPATHLEN && is_readable($input)) { + $this->file = $input; + $this->data = $this->parse_file($input); } else { - $this->file_data = &$this->file; + $this->file_data = &$input; $this->data = $this->parse_string(); } @@ -418,6 +419,8 @@ class Csv { return false; } + return true; + } /** From 95521cde8747560f447e808e97fd9443ce86434d Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Fri, 23 Feb 2018 10:17:09 +0100 Subject: [PATCH 08/34] reset file property if input is string --- src/Csv.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Csv.php b/src/Csv.php index e2160b6..129fe77 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -411,6 +411,7 @@ class Csv { $this->data = $this->parse_file($input); } else { + $this->file = null; $this->file_data = &$input; $this->data = $this->parse_string(); } From f8fe4cad033b1bd38b3b1bafeeb4697a855f2167 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Fri, 23 Feb 2018 10:37:12 +0100 Subject: [PATCH 09/34] change accessibility of parse_file and parse_string --- src/Csv.php | 6 +++--- tests/methods/ParseTest.php | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index 129fe77..5fe15be 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -408,7 +408,7 @@ class Csv { if (strlen($input) <= PHP_MAXPATHLEN && is_readable($input)) { $this->file = $input; - $this->data = $this->parse_file($input); + $this->data = $this->parse_file(); } else { $this->file = null; @@ -569,7 +569,7 @@ class Csv { * * @return array|bool */ - public function parse_file($file = null) { + protected function parse_file($file = null) { if (is_null($file)) { $file = $this->file; } @@ -592,7 +592,7 @@ class Csv { * * @return array|false - 2D array with CSV data, or false on failure */ - public function parse_string($data = null) { + protected function parse_string($data = null) { if (empty($data)) { if ($this->_check_data()) { $data = &$this->file_data; diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 9566499..6bf683c 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -89,7 +89,8 @@ class ParseTest extends TestCase $this->csv->enclosure = '"'; $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; $expected_data = [86545235689, 34365587654, 13469874576]; - $actual_data = $this->csv->parse_string($sInput); + + $actual_data = $this->invokeMethod($this->csv, 'parse_string', array($sInput)); $actual_column = array_map('reset', $actual_data); $this->assertEquals($expected_data, $actual_column); $this->assertEquals([ @@ -198,4 +199,22 @@ class ParseTest extends TestCase $this->assertArrayHasKey('column1', $csv->data[0], 'Data parsed incorrectly with enclosure ' . $enclosure); $this->assertEquals('value1', $csv->data[0]['column1'], 'Data parsed incorrectly with enclosure ' . $enclosure); } + + /** + * Call protected/private method of a class. + * + * @param object &$object Instantiated object that we will run method on. + * @param string $methodName Method name to call + * @param array $parameters Array of parameters to pass into method. + * + * @return mixed Method return. + */ + private function invokeMethod(&$object, $methodName, array $parameters = array()) + { + $reflection = new \ReflectionClass(get_class($object)); + $method = $reflection->getMethod($methodName); + $method->setAccessible(true); + + return $method->invokeArgs($object, $parameters); + } } From c9cc9697efdf0c0930486ec298f2c7008b861273 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Sat, 24 Feb 2018 16:55:45 +0100 Subject: [PATCH 10/34] new feature: getting total data row count without parsing all data --- src/Csv.php | 35 +++++++++++++++++++++++++++++++++++ tests/methods/ParseTest.php | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/src/Csv.php b/src/Csv.php index 74381e4..bed8292 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -540,6 +540,41 @@ class Csv { return $this->delimiter; } + /** + * Get total number of rows in csv without parsing whole data. + * + * @return bool|int + */ + public function getTotalRowCount(){ + if (empty($this->file_data)){ + return false; + } + + $this->_detect_and_remove_sep_row_from_data($this->file_data); + + $pattern = sprintf('/("[^%s]*")|[^%s]*/i',$this->enclosure, $this->enclosure); + preg_match_all($pattern,$this->file_data, $matches); + + foreach ($matches[0] as $match){ + if (empty($match) || !preg_match("/{$this->enclosure}/", $match)){ + continue; + } + + $replace = str_replace(["\r", "\n"], '', $match); + $this->file_data = str_replace($match, $replace, $this->file_data); + } + + $headingRow = $this->heading ? 1 : 0; + + $count = substr_count($this->file_data, "\r") + + substr_count($this->file_data, "\n") + - substr_count($this->file_data, "\r\n") + - $headingRow; + + + return $count; + } + // ============================================== // ----- [ Core Functions ] --------------------- // ============================================== diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 9566499..c81876a 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -198,4 +198,41 @@ class ParseTest extends TestCase $this->assertArrayHasKey('column1', $csv->data[0], 'Data parsed incorrectly with enclosure ' . $enclosure); $this->assertEquals('value1', $csv->data[0]['column1'], 'Data parsed incorrectly with enclosure ' . $enclosure); } + + public function countRowsProvider(){ + return [ + 'auto-double-enclosure' => [ + 'auto-double-enclosure.csv', + 2 + ], + 'auto-single-enclosure' => [ + 'auto-single-enclosure.csv', + 2 + ], + 'UTF-8_sep_row' => [ + 'datatype.csv', + 3 + ] + ]; + } + + /** + * @dataProvider countRowsProvider + * + * @param string $file + * @param int $expectedRows + */ + public function testGetTotalRowCountFromFile($file, $expectedRows){ + $this->csv->heading = true; + $this->csv->load_data(__DIR__ . '/fixtures/' . $file); + $this->assertEquals($expectedRows, $this->csv->getTotalRowCount()); + } + + public function testGetTotalRowCountMissingEndingLineBreak(){ + $this->csv->heading = false; + $this->csv->enclosure = '"'; + $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; + $this->csv->load_data($sInput); + $this->assertEquals(3, $this->csv->getTotalRowCount()); + } } From ba4cc0672a9e97038365354ffdebb0cfb350a975 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 06:57:40 +0100 Subject: [PATCH 11/34] reformat code --- src/Csv.php | 63 +++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index 5fe15be..24c0947 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -1,4 +1,5 @@ init($offset, $limit, $conditions, $keep_file_data); if (!empty($input)) { @@ -351,16 +352,16 @@ class Csv { } /** - * @param integer|null $offset Number of rows to ignore from the beginning - * of the data - * @param integer|null $limit Limits the number of returned rows to - * specified amount - * @param string|null $conditions Basic SQL-like conditions for row - * matching - * @param null|true $keep_file_data Keep raw file data in memory after - * successful parsing (useful for debugging) + * @param integer|null $offset Number of rows to ignore from the beginning + * of the data + * @param integer|null $limit Limits the number of returned rows to + * specified amount + * @param string|null $conditions Basic SQL-like conditions for row + * matching + * @param null|true $keep_file_data Keep raw file data in memory after + * successful parsing (useful for debugging) */ - public function init($offset = NULL, $limit = NULL, $conditions = NULL, $keep_file_data = NULL) { + public function init($offset = null, $limit = null, $conditions = null, $keep_file_data = null) { if (!is_null($offset)) { $this->offset = $offset; } @@ -393,7 +394,7 @@ class Csv { * * @return bool True on success */ - public function parse($input = NULL, $offset = NULL, $limit = NULL, $conditions = NULL) { + public function parse($input = null, $offset = null, $limit = null, $conditions = null) { if (is_null($input)) { $input = $this->file; } @@ -409,8 +410,7 @@ class Csv { if (strlen($input) <= PHP_MAXPATHLEN && is_readable($input)) { $this->file = $input; $this->data = $this->parse_file(); - } - else { + } else { $this->file = null; $this->file_data = &$input; $this->data = $this->parse_string(); @@ -926,12 +926,19 @@ class Csv { */ protected function _validate_row_condition($row, $condition) { $operators = array( - '=', 'equals', 'is', - '!=', 'is not', - '<', 'is less than', - '>', 'is greater than', - '<=', 'is less than or equals', - '>=', 'is greater than or equals', + '=', + 'equals', + 'is', + '!=', + 'is not', + '<', + 'is less than', + '>', + 'is greater than', + '<=', + 'is less than or equals', + '>=', + 'is greater than or equals', 'contains', 'does not contain', ); From b6247c367c8142381208b117f5daf83f410d5194 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:31:46 +0100 Subject: [PATCH 12/34] reformat code; only extended comment for new function --- src/Csv.php | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index bed8292..5a07658 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -1,4 +1,5 @@ file_data)){ + public function getTotalRowCount() { + if (empty($this->file_data)) { return false; } $this->_detect_and_remove_sep_row_from_data($this->file_data); - $pattern = sprintf('/("[^%s]*")|[^%s]*/i',$this->enclosure, $this->enclosure); - preg_match_all($pattern,$this->file_data, $matches); + $pattern = sprintf('/("[^%s]*")|[^%s]*/i', $this->enclosure, $this->enclosure); + preg_match_all($pattern, $this->file_data, $matches); - foreach ($matches[0] as $match){ - if (empty($match) || !preg_match("/{$this->enclosure}/", $match)){ + foreach ($matches[0] as $match) { + if (empty($match) || !preg_match("/{$this->enclosure}/", $match)) { continue; } @@ -950,12 +952,19 @@ class Csv { */ protected function _validate_row_condition($row, $condition) { $operators = array( - '=', 'equals', 'is', - '!=', 'is not', - '<', 'is less than', - '>', 'is greater than', - '<=', 'is less than or equals', - '>=', 'is greater than or equals', + '=', + 'equals', + 'is', + '!=', + 'is not', + '<', + 'is less than', + '>', + 'is greater than', + '<=', + 'is less than or equals', + '>=', + 'is greater than or equals', 'contains', 'does not contain', ); From 9e5c97328d88fd6d8f80f44a7ddb779dd79f767b Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:33:09 +0100 Subject: [PATCH 13/34] renamed function to getTotalDataRowCount --- src/Csv.php | 2 +- tests/methods/ParseTest.php | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index 5a07658..49ba372 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -547,7 +547,7 @@ class Csv { * * @return bool|int */ - public function getTotalRowCount() { + public function getTotalDataRowCount() { if (empty($this->file_data)) { return false; } diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index c81876a..5851438 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -225,7 +225,7 @@ class ParseTest extends TestCase public function testGetTotalRowCountFromFile($file, $expectedRows){ $this->csv->heading = true; $this->csv->load_data(__DIR__ . '/fixtures/' . $file); - $this->assertEquals($expectedRows, $this->csv->getTotalRowCount()); + $this->assertEquals($expectedRows, $this->csv->getTotalDataRowCount()); } public function testGetTotalRowCountMissingEndingLineBreak(){ @@ -233,6 +233,6 @@ class ParseTest extends TestCase $this->csv->enclosure = '"'; $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; $this->csv->load_data($sInput); - $this->assertEquals(3, $this->csv->getTotalRowCount()); + $this->assertEquals(3, $this->csv->getTotalDataRowCount()); } } From e5eccf1fc1d26e4699128776671fa6fab9786c12 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:38:08 +0100 Subject: [PATCH 14/34] put tests into new file --- tests/methods/DataRowCountTest.php | 66 ++++++++++++++++++++++++++++++ tests/methods/ParseTest.php | 37 ----------------- 2 files changed, 66 insertions(+), 37 deletions(-) create mode 100644 tests/methods/DataRowCountTest.php diff --git a/tests/methods/DataRowCountTest.php b/tests/methods/DataRowCountTest.php new file mode 100644 index 0000000..a1fb0e2 --- /dev/null +++ b/tests/methods/DataRowCountTest.php @@ -0,0 +1,66 @@ +csv = new Csv(); + } + + public function countRowsProvider() { + return [ + 'auto-double-enclosure' => [ + 'auto-double-enclosure.csv', + 2, + ], + 'auto-single-enclosure' => [ + 'auto-single-enclosure.csv', + 2, + ], + 'UTF-8_sep_row' => [ + 'datatype.csv', + 3, + ], + ]; + } + + /** + * @dataProvider countRowsProvider + * + * @param string $file + * @param int $expectedRows + */ + public function testGetTotalRowCountFromFile($file, $expectedRows) { + $this->csv->heading = true; + $this->csv->load_data(__DIR__ . '/fixtures/' . $file); + $this->assertEquals($expectedRows, $this->csv->getTotalDataRowCount()); + } + + public function testGetTotalRowCountMissingEndingLineBreak() { + $this->csv->heading = false; + $this->csv->enclosure = '"'; + $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; + $this->csv->load_data($sInput); + $this->assertEquals(3, $this->csv->getTotalDataRowCount()); + } +} diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 5851438..9566499 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -198,41 +198,4 @@ class ParseTest extends TestCase $this->assertArrayHasKey('column1', $csv->data[0], 'Data parsed incorrectly with enclosure ' . $enclosure); $this->assertEquals('value1', $csv->data[0]['column1'], 'Data parsed incorrectly with enclosure ' . $enclosure); } - - public function countRowsProvider(){ - return [ - 'auto-double-enclosure' => [ - 'auto-double-enclosure.csv', - 2 - ], - 'auto-single-enclosure' => [ - 'auto-single-enclosure.csv', - 2 - ], - 'UTF-8_sep_row' => [ - 'datatype.csv', - 3 - ] - ]; - } - - /** - * @dataProvider countRowsProvider - * - * @param string $file - * @param int $expectedRows - */ - public function testGetTotalRowCountFromFile($file, $expectedRows){ - $this->csv->heading = true; - $this->csv->load_data(__DIR__ . '/fixtures/' . $file); - $this->assertEquals($expectedRows, $this->csv->getTotalDataRowCount()); - } - - public function testGetTotalRowCountMissingEndingLineBreak(){ - $this->csv->heading = false; - $this->csv->enclosure = '"'; - $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; - $this->csv->load_data($sInput); - $this->assertEquals(3, $this->csv->getTotalDataRowCount()); - } } From 611b1a92e85ddf27cc3186bd1763755996fdf892 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:46:41 +0100 Subject: [PATCH 15/34] use strpos instead of preg_match --- src/Csv.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Csv.php b/src/Csv.php index 49ba372..b4b2e1d 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -558,7 +558,7 @@ class Csv { preg_match_all($pattern, $this->file_data, $matches); foreach ($matches[0] as $match) { - if (empty($match) || !preg_match("/{$this->enclosure}/", $match)) { + if (empty($match) || (strpos($match, $this->enclosure) === false)) { continue; } From aaefe2a480b853aab4fdb3a909ca2d555350610a Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:50:35 +0100 Subject: [PATCH 16/34] introduces new local variable that holds the data --- src/Csv.php | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index b4b2e1d..297f463 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -552,10 +552,12 @@ class Csv { return false; } - $this->_detect_and_remove_sep_row_from_data($this->file_data); + $data = $this->file_data; + + $this->_detect_and_remove_sep_row_from_data($data); $pattern = sprintf('/("[^%s]*")|[^%s]*/i', $this->enclosure, $this->enclosure); - preg_match_all($pattern, $this->file_data, $matches); + preg_match_all($pattern, $data, $matches); foreach ($matches[0] as $match) { if (empty($match) || (strpos($match, $this->enclosure) === false)) { @@ -563,14 +565,14 @@ class Csv { } $replace = str_replace(["\r", "\n"], '', $match); - $this->file_data = str_replace($match, $replace, $this->file_data); + $data = str_replace($match, $replace, $data); } $headingRow = $this->heading ? 1 : 0; - $count = substr_count($this->file_data, "\r") - + substr_count($this->file_data, "\n") - - substr_count($this->file_data, "\r\n") + $count = substr_count($data, "\r") + + substr_count($data, "\n") + - substr_count($data, "\r\n") - $headingRow; From 68b849a37bb5d8bf7f54a3e35b9631b28b66b41a Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:55:51 +0100 Subject: [PATCH 17/34] corrected regex to fit all given enclosures. Added test for single enclosure --- src/Csv.php | 2 +- tests/methods/DataRowCountTest.php | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Csv.php b/src/Csv.php index 297f463..5f98d79 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -556,7 +556,7 @@ class Csv { $this->_detect_and_remove_sep_row_from_data($data); - $pattern = sprintf('/("[^%s]*")|[^%s]*/i', $this->enclosure, $this->enclosure); + $pattern = sprintf('/(%1$s[^%1$s]*%1$s)/i', $this->enclosure); preg_match_all($pattern, $data, $matches); foreach ($matches[0] as $match) { diff --git a/tests/methods/DataRowCountTest.php b/tests/methods/DataRowCountTest.php index a1fb0e2..693d736 100644 --- a/tests/methods/DataRowCountTest.php +++ b/tests/methods/DataRowCountTest.php @@ -63,4 +63,14 @@ class DataRowCountTest extends TestCase { $this->csv->load_data($sInput); $this->assertEquals(3, $this->csv->getTotalDataRowCount()); } + + + public function testGetTotalRowCountSingleEnclosure() { + $this->csv->heading = false; + $this->csv->enclosure = "'"; + $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\'c\r\nd\'"; + $this->csv->load_data($sInput); + $this->assertEquals(3, $this->csv->getTotalDataRowCount()); + } + } From 951fc68886e22f2ef0be987c67114b86da66e9cb Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 09:46:44 +0100 Subject: [PATCH 18/34] new feature: auto detect if parsed file has heading --- src/extensions/DatatypeTrait.php | 40 +++++++++++++++++++++++++++++++- tests/methods/ParseTest.php | 20 ++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/extensions/DatatypeTrait.php b/src/extensions/DatatypeTrait.php index 3e66405..0c4bdb2 100644 --- a/src/extensions/DatatypeTrait.php +++ b/src/extensions/DatatypeTrait.php @@ -2,6 +2,8 @@ namespace ParseCsv\extensions; +use ParseCsv\enums\DatatypeEnum; + trait DatatypeTrait { /** @@ -47,7 +49,7 @@ trait DatatypeTrait { * * @access public * - * @uses getDatatypeFromString + * @uses DatatypeEnum::getValidTypeFromSample * * @return array|bool */ @@ -71,4 +73,40 @@ trait DatatypeTrait { return !empty($this->data_types) ? $this->data_types : []; } + + /** + * Check data type of titles / first row for auto detecting if this could be + * a heading line. + * + * Requires PHP >= 5.5 + * + * @access public + * + * @uses DatatypeEnum::getValidTypeFromSample + * + * @return bool + */ + public function autoDetectFileHasHeading(){ + if (empty($this->data)){ + throw new \UnexpectedValueException('No data set yet.'); + } + + if ($this->heading){ + $firstRow = $this->titles; + } else { + $firstRow = $this->data[0]; + } + + if (empty(array_filter($firstRow))){ + return false; + } + + $firstRowDatatype = array_map('ParseCsv\enums\DatatypeEnum::getValidTypeFromSample', $firstRow); + + if ($this->getMostFrequentDatatypeForColumn($firstRowDatatype) !== DatatypeEnum::TYPE_STRING){ + return false; + } + + return true; + } } diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 9566499..759565a 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -157,6 +157,26 @@ class ParseTest extends TestCase $this->assertEquals($expected, $this->csv->data_types); } + public function testAutoDetectFileHasHeading(){ + $this->csv->auto(__DIR__ . '/fixtures/datatype.csv'); + $this->assertTrue($this->csv->autoDetectFileHasHeading()); + + $this->csv->heading = false; + $this->csv->auto(__DIR__ . '/fixtures/datatype.csv'); + $this->assertTrue($this->csv->autoDetectFileHasHeading()); + + $this->csv->heading = false; + $sInput = "86545235689\r\n34365587654\r\n13469874576"; + $this->csv->auto($sInput); + $this->assertFalse($this->csv->autoDetectFileHasHeading()); + + $this->csv->heading = true; + $sInput = "86545235689\r\n34365587654\r\n13469874576"; + $this->csv->auto($sInput); + $this->assertFalse($this->csv->autoDetectFileHasHeading()); + + } + protected function _get_magazines_data() { return [ [ From 4bbc928f09ee72b67aa61303a9f5a7a17a904590 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 10:00:52 +0100 Subject: [PATCH 19/34] added dependency for test --- tests/methods/ParseTest.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 759565a..161ec29 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -157,6 +157,9 @@ class ParseTest extends TestCase $this->assertEquals($expected, $this->csv->data_types); } + /** + * @depends testSepRowAutoDetection + */ public function testAutoDetectFileHasHeading(){ $this->csv->auto(__DIR__ . '/fixtures/datatype.csv'); $this->assertTrue($this->csv->autoDetectFileHasHeading()); From 5b1002a6774d56338d41a174f32868ec41577e04 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 10:06:59 +0100 Subject: [PATCH 20/34] test correction --- tests/methods/ParseTest.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 161ec29..261f15b 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -161,6 +161,12 @@ class ParseTest extends TestCase * @depends testSepRowAutoDetection */ public function testAutoDetectFileHasHeading(){ + if (!function_exists('array_column')) { + // getDatatypes requires array_column, but that + // function is only available in PHP >= 5.5 + return; + } + $this->csv->auto(__DIR__ . '/fixtures/datatype.csv'); $this->assertTrue($this->csv->autoDetectFileHasHeading()); From fbe5263bca0baab99bc849af749b1e4434fc7e8f Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Tue, 27 Feb 2018 13:22:11 +0100 Subject: [PATCH 21/34] only code improvements --- src/extensions/DatatypeTrait.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/extensions/DatatypeTrait.php b/src/extensions/DatatypeTrait.php index 0c4bdb2..de9d92d 100644 --- a/src/extensions/DatatypeTrait.php +++ b/src/extensions/DatatypeTrait.php @@ -97,7 +97,8 @@ trait DatatypeTrait { $firstRow = $this->data[0]; } - if (empty(array_filter($firstRow))){ + $firstRow = array_filter($firstRow); + if (empty($firstRow)){ return false; } From 48a3cdbc5cfbf83fc71600101d5e607c20337cff Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Tue, 27 Feb 2018 14:18:00 +0100 Subject: [PATCH 22/34] new enum for file processing mode. extended documentation (comments #112) --- src/Csv.php | 9 +++++---- src/enums/FileProcessingModeEnum.php | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 src/enums/FileProcessingModeEnum.php diff --git a/src/Csv.php b/src/Csv.php index 74381e4..d9209a0 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -1,6 +1,7 @@ titles would be used instead. * * @return bool */ - public function save($file = '', $data = array(), $append = false, $fields = array()) { + public function save($file = '', $data = array(), $append = FileProcessingModeEnum::MODE_FILE_OVERWRITE, $fields = array()) { if (empty($file)) { $file = &$this->file; } - $mode = $append ? 'ab' : 'wb'; + $mode = FileProcessingModeEnum::getAppendMode($append); $is_php = preg_match('/\.php$/i', $file) ? true : false; return $this->_wfile($file, $this->unparse($data, $fields, $append, $is_php), $mode); @@ -760,7 +761,7 @@ class Csv { * * @return string CSV data */ - public function unparse($data = array(), $fields = array(), $append = false, $is_php = false, $delimiter = null) { + public function unparse($data = array(), $fields = array(), $append = FileProcessingModeEnum::MODE_FILE_OVERWRITE, $is_php = false, $delimiter = null) { if (!is_array($data) || empty($data)) { $data = &$this->data; } diff --git a/src/enums/FileProcessingModeEnum.php b/src/enums/FileProcessingModeEnum.php new file mode 100644 index 0000000..ab88055 --- /dev/null +++ b/src/enums/FileProcessingModeEnum.php @@ -0,0 +1,28 @@ + Date: Tue, 27 Feb 2018 14:33:26 +0100 Subject: [PATCH 23/34] new test for setting new headers before save (comments #82) --- tests/methods/SaveTest.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/methods/SaveTest.php b/tests/methods/SaveTest.php index 6e43b97..335844c 100644 --- a/tests/methods/SaveTest.php +++ b/tests/methods/SaveTest.php @@ -49,6 +49,13 @@ class SaveTest extends TestCase $this->saveAndCompare($expected); } + public function testSaveWithNewHeader() { + $this->csv->linefeed = "\n"; + $this->csv->titles = array("NewTitle"); + $expected = "NewTitle\n0444\n5555\n"; + $this->saveAndCompare($expected); + } + public function testSaveWithoutHeader() { $this->csv->linefeed = "\n"; $this->csv->heading = false; From fb9325884d8c81d95f4d39d5bcce4675bb5259c3 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Wed, 28 Feb 2018 13:47:38 +0100 Subject: [PATCH 24/34] small code improvement --- src/extensions/DatatypeTrait.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/extensions/DatatypeTrait.php b/src/extensions/DatatypeTrait.php index de9d92d..475fc40 100644 --- a/src/extensions/DatatypeTrait.php +++ b/src/extensions/DatatypeTrait.php @@ -64,7 +64,7 @@ trait DatatypeTrait { $result = []; foreach ($this->titles as $cName) { $column = array_column($this->data, $cName); - $cDatatypes = array_map('ParseCsv\enums\DatatypeEnum::getValidTypeFromSample', $column); + $cDatatypes = array_map(DatatypeEnum::class . '::getValidTypeFromSample', $column); $result[$cName] = $this->getMostFrequentDatatypeForColumn($cDatatypes); } @@ -102,7 +102,7 @@ trait DatatypeTrait { return false; } - $firstRowDatatype = array_map('ParseCsv\enums\DatatypeEnum::getValidTypeFromSample', $firstRow); + $firstRowDatatype = array_map(DatatypeEnum::class . '::getValidTypeFromSample', $firstRow); if ($this->getMostFrequentDatatypeForColumn($firstRowDatatype) !== DatatypeEnum::TYPE_STRING){ return false; From da708386986e6f31f9ece91a2ae01305fc3ecf29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Bl=C3=A4ul?= Date: Fri, 2 Mar 2018 17:52:26 +0100 Subject: [PATCH 25/34] Only improved ChangeLog to include the output() change --- ChangeLog.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog.txt b/ChangeLog.txt index b552a65..c647607 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -8,6 +8,9 @@ Date: unreleased - Added support for MS Excel's "sep=" to detect the delimiter (Issue #60). +- MIME: output() sends correct MIME type to browser + if the separator is a tab tab (Issue #79) + - Added support for mb_convert_encoding() instead of iconv() - see issue #109 From a80a6f18621fd352c7341a9d168880550bf9fb05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Bl=C3=A4ul?= Date: Sat, 3 Mar 2018 11:55:06 +0100 Subject: [PATCH 26/34] Moved features section up because library selection comes before install --- README.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index f2960f6..dbf36d2 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,23 @@ and third-party support for handling CSV data in PHP. [csv]: http://en.wikipedia.org/wiki/Comma-separated_values +## Features + +* ParseCsv is a complete and fully featured CSV solution for PHP +* Supports enclosed values, enclosed commas, double quotes and new lines. +* Automatic delimiter character detection. +* Sort data by specific fields/columns. +* Easy data manipulation. +* Basic SQL-like _conditions_, _offset_ and _limit_ options for filtering + data. +* Error detection for incorrectly formatted input. It attempts to be + intelligent, but can not be trusted 100% due to the structure of CSV, and + how different programs like Excel for example outputs CSV data. +* Support for character encoding conversion using PHP's + `iconv()` and `mb_convert_encoding()` functions. +* Supports PHP 5.4 and higher. + It certainly works with PHP 7.2 and all versions in between. + ## Installation Installation is easy using Composer. Just run the following on the @@ -33,23 +50,6 @@ repository or extract the [ZIP](https://github.com/parsecsv/parsecsv-for-php/archive/master.zip). To use ParseCSV, you then have to add a `require 'parsecsv.lib.php';` line. -## Features - -* ParseCsv is a complete and fully featured CSV solution for PHP -* Supports enclosed values, enclosed commas, double quotes and new lines. -* Automatic delimiter character detection. -* Sort data by specific fields/columns. -* Easy data manipulation. -* Basic SQL-like _conditions_, _offset_ and _limit_ options for filtering - data. -* Error detection for incorrectly formatted input. It attempts to be - intelligent, but can not be trusted 100% due to the structure of CSV, and - how different programs like Excel for example outputs CSV data. -* Support for character encoding conversion using PHP's - `iconv()` and `mb_convert_encoding()` functions. -* Supports PHP 5.4 and higher. - It certainly works with PHP 7.2 and all versions in between. - ## Example Usage **General** From 4b6b7ee0b8b0b023cdda568205fdeb4b46929880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Bl=C3=A4ul?= Date: Sat, 3 Mar 2018 11:55:57 +0100 Subject: [PATCH 27/34] Removed -rc.2 from version, as it is officially released now --- ChangeLog.txt | 4 ++-- src/Csv.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index c647607..ee793ed 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,6 +1,6 @@ -ParseCSV 1.0.0-rc.2 +ParseCSV 1.0.0 ----------------------------------- -Date: unreleased +Date: 3-March-2018 - Renamed class from parseCSV to Csv and added name- space "ParseCsv" for PSR compliance. diff --git a/src/Csv.php b/src/Csv.php index 752350c..d433d75 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -7,7 +7,7 @@ use ParseCsv\extensions\DatatypeTrait; class Csv { /* - Class: ParseCSV 1.0.0-rc.2 + Class: ParseCSV 1.0.0 https://github.com/parsecsv/parsecsv-for-php Fully conforms to the specifications lined out on Wikipedia: From 9c14bc2f30c55687588d794ad2b145bf0ac56196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Bl=C3=A4ul?= Date: Sat, 3 Mar 2018 12:01:45 +0100 Subject: [PATCH 28/34] Updated ChangeLog.txt with new features --- ChangeLog.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index ee793ed..601ccef 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -8,13 +8,18 @@ Date: 3-March-2018 - Added support for MS Excel's "sep=" to detect the delimiter (Issue #60). +- Added data type detection - function getDatatypes() + guesses the type of each column. + - MIME: output() sends correct MIME type to browser - if the separator is a tab tab (Issue #79) + if the separator is a tab tab (Issue #79). - Added support for mb_convert_encoding() instead of - iconv() - see issue #109 + iconv() - see issue #109. -- A number of minor bug fixes - see GitHub issues +- A number of minor bug fixes - see GitHub issues. + +- Added many more unit tests. ----------------------------------- From 7168cb15e3bca0619e7945f8d42b83e971bbd28c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Bl=C3=A4ul?= Date: Wed, 7 Mar 2018 09:40:43 +0100 Subject: [PATCH 29/34] ChangeLog.txt: Added new feature --- ChangeLog.txt | 554 +++++++++++++++++++++++++------------------------- 1 file changed, 282 insertions(+), 272 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index 601ccef..d592923 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,272 +1,282 @@ -ParseCSV 1.0.0 ------------------------------------ -Date: 3-March-2018 - -- Renamed class from parseCSV to Csv and added name- - space "ParseCsv" for PSR compliance. - -- Added support for MS Excel's "sep=" to detect the - delimiter (Issue #60). - -- Added data type detection - function getDatatypes() - guesses the type of each column. - -- MIME: output() sends correct MIME type to browser - if the separator is a tab tab (Issue #79). - -- Added support for mb_convert_encoding() instead of - iconv() - see issue #109. - -- A number of minor bug fixes - see GitHub issues. - -- Added many more unit tests. - ------------------------------------ - - -parseCSV 0.4.3 beta ------------------------------------ -Date: 1-July-2008 - -- Issue #4. Added an option for setting sorting - type behavior when sorting data. - Simply set $csv->sort_type to "regular", "numeric", - or "string". - -- Issue #6. Raw loaded file data is now cleared from - file_data property when it has been successfully - parsed to keep parseCSV's memory footprint to a - minimum. Specifically handy when using multiple - instances of parseCSV to process large files. - ------------------------------------ - - -parseCSV 0.4.2 beta ------------------------------------ -Date: 31-May-2008 - -- IMPORTANT! If you're using the output(), - method please note that the first parameter - has been completely removed as it was - technically just useless. Instead, the second - parameter (filename) doubles as its replacement. - Simply put, if filename is not set or null, the - output() method will not output a downloadable - file. Please update your existing code - when using 0.4.2 and later :) - -- Small fix to the headers sent by the output() - method. - -- Added a download example using the output() - method to the examples folder. - ------------------------------------ - - -parseCSV 0.4.1 beta ------------------------------------ -Date: 29-May-2008 - -- Fixed a small bug in how the output() method - handles input data. - ------------------------------------ - - -parseCSV 0.4 beta ------------------------------------ -Date: 11-Apr-2008 - -- Error reporting for files/data which is corrupt - or has formatting errors like using double - quotes in a field without enclosing quotes. Or - not escaping double quotes with a second one. - -- parse() method does not require input anymore - if the "$object->file" property has been set. - -I'm calling this a beta release due to the heavy -modifications to the core parsing logic required -for error reporting to work. I have tested the -new code quite extensively, I'm fairly confident -that it still parses exactly as it always has. - -The second reason I'm calling it a beta release -is cause I'm sure the error reporting code will -need more refinements and tweaks to detect more -types of errors, as it's only picking two types -or syntax errors right now. However, it seems -these two are the most common errors that you -would be likely to come across. - ------------------------------------ - - -parseCSV 0.3.2 ------------------------------------ -Date: 1-Apr-2008 - -This is primarily a bug-fix release for a critical -bug which was brought to my attention. - -- Fixed a critical bug in conditions parsing which - would generate corrupt matching patterns causing - the condition(s) to not work at all in some - situations. - -- Fixed a small code error which would cause PHP to - generate a invalid offset notice when zero length - values were fed into the unparse() method to - generate CSV data from an array. - -Notice: If you have been using the "parsecsv-stable" -branch as an external in any of your projects, -please use the "stable/parsecsv" branch from this -point on as I will eventually remove the former due -to it's stupid naming. - ------------------------------------ - - -parseCSV 0.3.1 ------------------------------------ -Date: 1-Sep-2007 - -- Small change to default output settings to - conform with RFC 4180 (http://rfc.net/rfc4180.html). - Only the LF (line feed) character was used - by default to separate rows, rather than - CRLF (carriage return & line feed). - ------------------------------------ - - -parseCSV 0.3.0 ------------------------------------ -Date: 9-Aug-2007 - -- Changed to the MIT license. - -- Added offset and limit options. - -- Added SQL-like conditions for quickly - filtering out entries. Documentation on the - condition syntax is forthcoming. - -- Small parsing modification to comply - with some recent changes to the specifications - outlined on Wikipedia's Comma-separated values - article. - -- Minor changes and optimizations, and a few - spelling corrections. Oops :) - -- Included more complex code examples in the - parseCSV download. - ------------------------------------ - - -parseCSV 0.2.1 ------------------------------------ -Date: 8-Aug-2007 - -- Fixed stupid code which caused auto function - to not work in some situations. - ------------------------------------ - - -parseCSV 0.2.0 beta ------------------------------------ -Date: 2-Jan-2007 - -- Added auto() function to automatically detect - delimiter character. - Useful for user upload in case delimiter is - comma (,), tab, or semi-colon (;). Some - versions of MS Excel for Windows use - semi-colons instead of commas when saving to - CSV files. - It uses a process of elimination to eliminate - characters that can not be the delimiter, - so it should work on all CSV-structured files - almost no matter what the delimiter is. - -- Generally updated some of the core workings - to increase performance, and offer better - support for large (1MB and up) files. - -- Added code examples to header comment. - ------------------------------------ - - -parseCSV 0.1.6 beta ------------------------------------ -Date: 22-Dec-2006 - -- Updated output() function. - ------------------------------------ - - -parseCSV 0.1.5 beta ------------------------------------ -Date: 22-Dec-2006 - -- Added output() function for easy output to - browser, for downloading features for example. - ------------------------------------ - - -parseCSV 0.1.4 beta ------------------------------------ -Date: 17-Dec-2006 - -- Minor changes and fixes - ------------------------------------ - - -parseCSV 0.1.3 beta ------------------------------------ -Date: 17-Dec-2006 - -- Added GPL v2.0 license. - ------------------------------------ - - -parseCSV 0.1.2 beta ------------------------------------ -Date: 17-Dec-2006 - -- Added encoding() function for easier character - encoding configuration. - ------------------------------------ - - -parseCSV 0.1.1 beta ------------------------------------ -Date: 24-Nov-2006 - -- Added support for a PHP die command on first - line of csv files if they have a .php extension - to protect secure data from being displayed - directly to the browser. - ------------------------------------ - - -parseCSV 0.1 beta ------------------------------------ -Date: 23-Nov-2006 - -- Initial release - ------------------------------------ +ParseCSV dev-master +----------------------------------- +Date: unreleased + +- New function getTotalDataRowCount() - useful if + $limit is set - see pull request #122. + +----------------------------------- + + +ParseCSV 1.0.0 +----------------------------------- +Date: 3-March-2018 + +- Renamed class from parseCSV to Csv and added name- + space "ParseCsv" for PSR compliance. + +- Added support for MS Excel's "sep=" to detect the + delimiter (Issue #60). + +- Added data type detection - function getDatatypes() + guesses the type of each column. + +- MIME: output() sends correct MIME type to browser + if the separator is a tab tab (Issue #79). + +- Added support for mb_convert_encoding() instead of + iconv() - see issue #109. + +- A number of minor bug fixes - see GitHub issues. + +- Added many more unit tests. + +----------------------------------- + + +parseCSV 0.4.3 beta +----------------------------------- +Date: 1-July-2008 + +- Issue #4. Added an option for setting sorting + type behavior when sorting data. + Simply set $csv->sort_type to "regular", "numeric", + or "string". + +- Issue #6. Raw loaded file data is now cleared from + file_data property when it has been successfully + parsed to keep parseCSV's memory footprint to a + minimum. Specifically handy when using multiple + instances of parseCSV to process large files. + +----------------------------------- + + +parseCSV 0.4.2 beta +----------------------------------- +Date: 31-May-2008 + +- IMPORTANT! If you're using the output(), + method please note that the first parameter + has been completely removed as it was + technically just useless. Instead, the second + parameter (filename) doubles as its replacement. + Simply put, if filename is not set or null, the + output() method will not output a downloadable + file. Please update your existing code + when using 0.4.2 and later :) + +- Small fix to the headers sent by the output() + method. + +- Added a download example using the output() + method to the examples folder. + +----------------------------------- + + +parseCSV 0.4.1 beta +----------------------------------- +Date: 29-May-2008 + +- Fixed a small bug in how the output() method + handles input data. + +----------------------------------- + + +parseCSV 0.4 beta +----------------------------------- +Date: 11-Apr-2008 + +- Error reporting for files/data which is corrupt + or has formatting errors like using double + quotes in a field without enclosing quotes. Or + not escaping double quotes with a second one. + +- parse() method does not require input anymore + if the "$object->file" property has been set. + +I'm calling this a beta release due to the heavy +modifications to the core parsing logic required +for error reporting to work. I have tested the +new code quite extensively, I'm fairly confident +that it still parses exactly as it always has. + +The second reason I'm calling it a beta release +is cause I'm sure the error reporting code will +need more refinements and tweaks to detect more +types of errors, as it's only picking two types +or syntax errors right now. However, it seems +these two are the most common errors that you +would be likely to come across. + +----------------------------------- + + +parseCSV 0.3.2 +----------------------------------- +Date: 1-Apr-2008 + +This is primarily a bug-fix release for a critical +bug which was brought to my attention. + +- Fixed a critical bug in conditions parsing which + would generate corrupt matching patterns causing + the condition(s) to not work at all in some + situations. + +- Fixed a small code error which would cause PHP to + generate a invalid offset notice when zero length + values were fed into the unparse() method to + generate CSV data from an array. + +Notice: If you have been using the "parsecsv-stable" +branch as an external in any of your projects, +please use the "stable/parsecsv" branch from this +point on as I will eventually remove the former due +to it's stupid naming. + +----------------------------------- + + +parseCSV 0.3.1 +----------------------------------- +Date: 1-Sep-2007 + +- Small change to default output settings to + conform with RFC 4180 (http://rfc.net/rfc4180.html). + Only the LF (line feed) character was used + by default to separate rows, rather than + CRLF (carriage return & line feed). + +----------------------------------- + + +parseCSV 0.3.0 +----------------------------------- +Date: 9-Aug-2007 + +- Changed to the MIT license. + +- Added offset and limit options. + +- Added SQL-like conditions for quickly + filtering out entries. Documentation on the + condition syntax is forthcoming. + +- Small parsing modification to comply + with some recent changes to the specifications + outlined on Wikipedia's Comma-separated values + article. + +- Minor changes and optimizations, and a few + spelling corrections. Oops :) + +- Included more complex code examples in the + parseCSV download. + +----------------------------------- + + +parseCSV 0.2.1 +----------------------------------- +Date: 8-Aug-2007 + +- Fixed stupid code which caused auto function + to not work in some situations. + +----------------------------------- + + +parseCSV 0.2.0 beta +----------------------------------- +Date: 2-Jan-2007 + +- Added auto() function to automatically detect + delimiter character. + Useful for user upload in case delimiter is + comma (,), tab, or semi-colon (;). Some + versions of MS Excel for Windows use + semi-colons instead of commas when saving to + CSV files. + It uses a process of elimination to eliminate + characters that can not be the delimiter, + so it should work on all CSV-structured files + almost no matter what the delimiter is. + +- Generally updated some of the core workings + to increase performance, and offer better + support for large (1MB and up) files. + +- Added code examples to header comment. + +----------------------------------- + + +parseCSV 0.1.6 beta +----------------------------------- +Date: 22-Dec-2006 + +- Updated output() function. + +----------------------------------- + + +parseCSV 0.1.5 beta +----------------------------------- +Date: 22-Dec-2006 + +- Added output() function for easy output to + browser, for downloading features for example. + +----------------------------------- + + +parseCSV 0.1.4 beta +----------------------------------- +Date: 17-Dec-2006 + +- Minor changes and fixes + +----------------------------------- + + +parseCSV 0.1.3 beta +----------------------------------- +Date: 17-Dec-2006 + +- Added GPL v2.0 license. + +----------------------------------- + + +parseCSV 0.1.2 beta +----------------------------------- +Date: 17-Dec-2006 + +- Added encoding() function for easier character + encoding configuration. + +----------------------------------- + + +parseCSV 0.1.1 beta +----------------------------------- +Date: 24-Nov-2006 + +- Added support for a PHP die command on first + line of csv files if they have a .php extension + to protect secure data from being displayed + directly to the browser. + +----------------------------------- + + +parseCSV 0.1 beta +----------------------------------- +Date: 23-Nov-2006 + +- Initial release + +----------------------------------- From 03bc946b988049dfa7c510c404bc3afc88ed7a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Bl=C3=A4ul?= Date: Wed, 7 Mar 2018 09:45:48 +0100 Subject: [PATCH 30/34] Sorted uses alphabetically, removed version The repo is no longer on version 1.0.0. The ChangeLog.txt file is a more obvious place for people to see which version includes what. --- src/Csv.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index df148a5..9a77efd 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -2,14 +2,13 @@ namespace ParseCsv; -use ParseCsv\enums\SortEnum; use ParseCsv\enums\FileProcessingModeEnum; +use ParseCsv\enums\SortEnum; use ParseCsv\extensions\DatatypeTrait; class Csv { /* - Class: ParseCSV 1.0.0 https://github.com/parsecsv/parsecsv-for-php Fully conforms to the specifications lined out on Wikipedia: From 086cd15b4446cd0bb094dcf1d5159bebd2949207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Bl=C3=A4ul?= Date: Wed, 7 Mar 2018 09:51:50 +0100 Subject: [PATCH 31/34] Added requires to keep Composer-free environments working --- parsecsv.lib.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/parsecsv.lib.php b/parsecsv.lib.php index 902903b..265ff22 100644 --- a/parsecsv.lib.php +++ b/parsecsv.lib.php @@ -6,6 +6,10 @@ // Check if people used Composer to include this project in theirs if (!file_exists(__DIR__ . '/vendor/autoload.php')) { + require __DIR__ . '/src/enums/AbstractEnum.php'; + require __DIR__ . '/src/enums/DatatypeEnum.php'; + require __DIR__ . '/src/enums/FileProcessingModeEnum.php'; + require __DIR__ . '/src/enums/SortEnum.php'; require __DIR__ . '/src/extensions/DatatypeTrait.php'; require __DIR__ . '/src/Csv.php'; } else { From b7f2075efc630cb1a48718497b48573178f93810 Mon Sep 17 00:00:00 2001 From: susgo Date: Sun, 11 Mar 2018 11:14:53 +0100 Subject: [PATCH 32/34] Fix #41: output order and subset (#126) * init unparse tests for ordering and subseting by fields * added one test for heading=false * implements functionality of this issue --- src/Csv.php | 42 ++++++++++++++++++++++-- tests/methods/UnparseTest.php | 62 +++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 tests/methods/UnparseTest.php diff --git a/src/Csv.php b/src/Csv.php index 9a77efd..212f10e 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -793,8 +793,15 @@ class Csv { $entry = array(); // create heading + $fieldOrder = $this->_validate_fields_for_unparse($fields); + if (!$fieldOrder && !empty($data)) { + $column_count = count($data[0]); + $columns = range(0, $column_count - 1, 1); + $fieldOrder = array_combine($columns, $columns); + } + if ($this->heading && !$append && !empty($fields)) { - foreach ($fields as $key => $column_name) { + foreach ($fieldOrder as $column_name) { $entry[] = $this->_enclose_value($column_name, $delimiter); } @@ -804,7 +811,8 @@ class Csv { // create data foreach ($data as $key => $row) { - foreach ($row as $cell_value) { + foreach (array_keys($fieldOrder) as $index){ + $cell_value = $row[$index]; $entry[] = $this->_enclose_value($cell_value, $delimiter); } @@ -819,6 +827,36 @@ class Csv { return $string; } + private function _validate_fields_for_unparse($fields){ + if (empty($fields)){ + return []; + } + + // both are identical, also in ordering + if (array_values($fields) === array_values($this->titles)){ + return array_combine($fields, $fields); + } + + // if renaming given by: $oldName => $newName (maybe with reorder and / or subset): + // todo: this will only work if titles are unique + $fieldOrder = array_intersect(array_flip($fields), $this->titles); + if (!empty($fieldOrder)) { + return array_flip($fieldOrder); + } + + $fieldOrder = array_intersect($fields, $this->titles); + if (!empty($fieldOrder)) { + return array_combine($fieldOrder, $fieldOrder); + } + + // original titles are not given in fields. that is okay if count is okay. + if (count($fields) != count($this->titles)) { + throw new \UnexpectedValueException('The specified fields do not match any titles and do not match column count.'); + } + + return array_combine($this->titles, $fields); + } + /** * Load local file or string * diff --git a/tests/methods/UnparseTest.php b/tests/methods/UnparseTest.php new file mode 100644 index 0000000..dbc1f75 --- /dev/null +++ b/tests/methods/UnparseTest.php @@ -0,0 +1,62 @@ +csv = new Csv(); + $this->csv->auto(__DIR__ . '/fixtures/auto-double-enclosure.csv'); + } + + public function testUnparseDefault() { + $expected = "column1,column2\rvalue1,value2\rvalue3,value4\r"; + $this->unparseAndCompare($expected); + } + + public function testUnparseDefaultWithoutHeading(){ + $this->csv->heading = false; + $this->csv->auto(__DIR__ . '/fixtures/auto-double-enclosure.csv'); + $expected = "column1,column2\rvalue1,value2\rvalue3,value4\r"; + $this->unparseAndCompare($expected); + + } + + public function testUnparseRenameFields() { + $expected = "C1,C2\rvalue1,value2\rvalue3,value4\r"; + $this->unparseAndCompare($expected, array("C1", "C2")); + } + + public function testReorderFields() { + $expected = "column2,column1\rvalue2,value1\rvalue4,value3\r"; + $this->unparseAndCompare($expected, array("column2", "column1")); + } + + public function testSubsetFields() { + $expected = "column1\rvalue1\rvalue3\r"; + $this->unparseAndCompare($expected, array("column1")); + } + + public function testReorderAndRenameFields() { + $fields = array( + 'column2' => 'C2', + 'column1' => 'C1', + ); + $expected = "C2,C1\rvalue2,value1\rvalue4,value3\r"; + $this->unparseAndCompare($expected, $fields); + } + + private function unparseAndCompare($expected, $fields = array()) { + $str = $this->csv->unparse($this->csv->data, $fields); + $this->assertEquals($expected, $str); + } + +} From 5bc6d09b5e545309e7d573902daffd1a6f6f0acc Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Sun, 11 Mar 2018 12:49:24 +0100 Subject: [PATCH 33/34] fixes bug on _validate_fields_for_unparse() if titles property is used instead of fields parameter for changing the titles for unparsing --- src/Csv.php | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index 212f10e..f0daee0 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -828,33 +828,39 @@ class Csv { } private function _validate_fields_for_unparse($fields){ + // this is needed because sometime titles property is overwritten instead of using fields parameter! + $titlesOnParse = !empty($this->data) ? array_keys($this->data[0]) : array(); if (empty($fields)){ - return []; + $fields = $this->titles; + } + + if (empty($fields)){ + return array(); } // both are identical, also in ordering - if (array_values($fields) === array_values($this->titles)){ + if (array_values($fields) === array_values($titlesOnParse)){ return array_combine($fields, $fields); } // if renaming given by: $oldName => $newName (maybe with reorder and / or subset): // todo: this will only work if titles are unique - $fieldOrder = array_intersect(array_flip($fields), $this->titles); + $fieldOrder = array_intersect(array_flip($fields), $titlesOnParse); if (!empty($fieldOrder)) { return array_flip($fieldOrder); } - $fieldOrder = array_intersect($fields, $this->titles); + $fieldOrder = array_intersect($fields, $titlesOnParse); if (!empty($fieldOrder)) { return array_combine($fieldOrder, $fieldOrder); } // original titles are not given in fields. that is okay if count is okay. - if (count($fields) != count($this->titles)) { + if (count($fields) != count($titlesOnParse)) { throw new \UnexpectedValueException('The specified fields do not match any titles and do not match column count.'); } - return array_combine($this->titles, $fields); + return array_combine($titlesOnParse, $fields); } /** From e4c9fed6cf861ed84860fd101778ee0bf4b33f5d Mon Sep 17 00:00:00 2001 From: susgo Date: Mon, 12 Mar 2018 08:28:25 +0100 Subject: [PATCH 34/34] Update README.md for old and new functions (#129) * Update README.md --- README.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/README.md b/README.md index dbf36d2..3a62600 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,40 @@ $csv->auto('data.csv'); print_r($csv->data); ``` +**Parse data with offset** +* ignoring the first X (e.g. two) rows +```php +$csv = new ParseCsv\Csv(); +$csv->offset = 2; +$csv->parse('data.csv'); +print_r($csv->data); +``` + +**Limit the number of returned data rows** +```php +$csv = new ParseCsv\Csv(); +$csv->limit = 5; +$csv->parse('data.csv'); +print_r($csv->data); +``` + +**Get total number of data rows without parsing whole data** +* Excluding heading line if present (see $csv->header property) +```php +$csv = new ParseCsv\Csv(); +$csv->load_data('data.csv'); +$count = $csv->getTotalRowCount(); +print_r($count); +``` + +**Get most common data type for each column (Requires PHP >= 5.5)** + +```php +$csv = new ParseCsv\Csv('data.csv'); +$csv->getDatatypes() +print_r($csv->data_types); +``` + **Modify data in a CSV file** ```php