From c9cc9697efdf0c0930486ec298f2c7008b861273 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Sat, 24 Feb 2018 16:55:45 +0100 Subject: [PATCH 1/7] new feature: getting total data row count without parsing all data --- src/Csv.php | 35 +++++++++++++++++++++++++++++++++++ tests/methods/ParseTest.php | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/src/Csv.php b/src/Csv.php index 74381e4..bed8292 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -540,6 +540,41 @@ class Csv { return $this->delimiter; } + /** + * Get total number of rows in csv without parsing whole data. + * + * @return bool|int + */ + public function getTotalRowCount(){ + if (empty($this->file_data)){ + return false; + } + + $this->_detect_and_remove_sep_row_from_data($this->file_data); + + $pattern = sprintf('/("[^%s]*")|[^%s]*/i',$this->enclosure, $this->enclosure); + preg_match_all($pattern,$this->file_data, $matches); + + foreach ($matches[0] as $match){ + if (empty($match) || !preg_match("/{$this->enclosure}/", $match)){ + continue; + } + + $replace = str_replace(["\r", "\n"], '', $match); + $this->file_data = str_replace($match, $replace, $this->file_data); + } + + $headingRow = $this->heading ? 1 : 0; + + $count = substr_count($this->file_data, "\r") + + substr_count($this->file_data, "\n") + - substr_count($this->file_data, "\r\n") + - $headingRow; + + + return $count; + } + // ============================================== // ----- [ Core Functions ] --------------------- // ============================================== diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 9566499..c81876a 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -198,4 +198,41 @@ class ParseTest extends TestCase $this->assertArrayHasKey('column1', $csv->data[0], 'Data parsed incorrectly with enclosure ' . $enclosure); $this->assertEquals('value1', $csv->data[0]['column1'], 'Data parsed incorrectly with enclosure ' . $enclosure); } + + public function countRowsProvider(){ + return [ + 'auto-double-enclosure' => [ + 'auto-double-enclosure.csv', + 2 + ], + 'auto-single-enclosure' => [ + 'auto-single-enclosure.csv', + 2 + ], + 'UTF-8_sep_row' => [ + 'datatype.csv', + 3 + ] + ]; + } + + /** + * @dataProvider countRowsProvider + * + * @param string $file + * @param int $expectedRows + */ + public function testGetTotalRowCountFromFile($file, $expectedRows){ + $this->csv->heading = true; + $this->csv->load_data(__DIR__ . '/fixtures/' . $file); + $this->assertEquals($expectedRows, $this->csv->getTotalRowCount()); + } + + public function testGetTotalRowCountMissingEndingLineBreak(){ + $this->csv->heading = false; + $this->csv->enclosure = '"'; + $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; + $this->csv->load_data($sInput); + $this->assertEquals(3, $this->csv->getTotalRowCount()); + } } From b6247c367c8142381208b117f5daf83f410d5194 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:31:46 +0100 Subject: [PATCH 2/7] reformat code; only extended comment for new function --- src/Csv.php | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index bed8292..5a07658 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -1,4 +1,5 @@ file_data)){ + public function getTotalRowCount() { + if (empty($this->file_data)) { return false; } $this->_detect_and_remove_sep_row_from_data($this->file_data); - $pattern = sprintf('/("[^%s]*")|[^%s]*/i',$this->enclosure, $this->enclosure); - preg_match_all($pattern,$this->file_data, $matches); + $pattern = sprintf('/("[^%s]*")|[^%s]*/i', $this->enclosure, $this->enclosure); + preg_match_all($pattern, $this->file_data, $matches); - foreach ($matches[0] as $match){ - if (empty($match) || !preg_match("/{$this->enclosure}/", $match)){ + foreach ($matches[0] as $match) { + if (empty($match) || !preg_match("/{$this->enclosure}/", $match)) { continue; } @@ -950,12 +952,19 @@ class Csv { */ protected function _validate_row_condition($row, $condition) { $operators = array( - '=', 'equals', 'is', - '!=', 'is not', - '<', 'is less than', - '>', 'is greater than', - '<=', 'is less than or equals', - '>=', 'is greater than or equals', + '=', + 'equals', + 'is', + '!=', + 'is not', + '<', + 'is less than', + '>', + 'is greater than', + '<=', + 'is less than or equals', + '>=', + 'is greater than or equals', 'contains', 'does not contain', ); From 9e5c97328d88fd6d8f80f44a7ddb779dd79f767b Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:33:09 +0100 Subject: [PATCH 3/7] renamed function to getTotalDataRowCount --- src/Csv.php | 2 +- tests/methods/ParseTest.php | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index 5a07658..49ba372 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -547,7 +547,7 @@ class Csv { * * @return bool|int */ - public function getTotalRowCount() { + public function getTotalDataRowCount() { if (empty($this->file_data)) { return false; } diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index c81876a..5851438 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -225,7 +225,7 @@ class ParseTest extends TestCase public function testGetTotalRowCountFromFile($file, $expectedRows){ $this->csv->heading = true; $this->csv->load_data(__DIR__ . '/fixtures/' . $file); - $this->assertEquals($expectedRows, $this->csv->getTotalRowCount()); + $this->assertEquals($expectedRows, $this->csv->getTotalDataRowCount()); } public function testGetTotalRowCountMissingEndingLineBreak(){ @@ -233,6 +233,6 @@ class ParseTest extends TestCase $this->csv->enclosure = '"'; $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; $this->csv->load_data($sInput); - $this->assertEquals(3, $this->csv->getTotalRowCount()); + $this->assertEquals(3, $this->csv->getTotalDataRowCount()); } } From e5eccf1fc1d26e4699128776671fa6fab9786c12 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:38:08 +0100 Subject: [PATCH 4/7] put tests into new file --- tests/methods/DataRowCountTest.php | 66 ++++++++++++++++++++++++++++++ tests/methods/ParseTest.php | 37 ----------------- 2 files changed, 66 insertions(+), 37 deletions(-) create mode 100644 tests/methods/DataRowCountTest.php diff --git a/tests/methods/DataRowCountTest.php b/tests/methods/DataRowCountTest.php new file mode 100644 index 0000000..a1fb0e2 --- /dev/null +++ b/tests/methods/DataRowCountTest.php @@ -0,0 +1,66 @@ +csv = new Csv(); + } + + public function countRowsProvider() { + return [ + 'auto-double-enclosure' => [ + 'auto-double-enclosure.csv', + 2, + ], + 'auto-single-enclosure' => [ + 'auto-single-enclosure.csv', + 2, + ], + 'UTF-8_sep_row' => [ + 'datatype.csv', + 3, + ], + ]; + } + + /** + * @dataProvider countRowsProvider + * + * @param string $file + * @param int $expectedRows + */ + public function testGetTotalRowCountFromFile($file, $expectedRows) { + $this->csv->heading = true; + $this->csv->load_data(__DIR__ . '/fixtures/' . $file); + $this->assertEquals($expectedRows, $this->csv->getTotalDataRowCount()); + } + + public function testGetTotalRowCountMissingEndingLineBreak() { + $this->csv->heading = false; + $this->csv->enclosure = '"'; + $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; + $this->csv->load_data($sInput); + $this->assertEquals(3, $this->csv->getTotalDataRowCount()); + } +} diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index 5851438..9566499 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -198,41 +198,4 @@ class ParseTest extends TestCase $this->assertArrayHasKey('column1', $csv->data[0], 'Data parsed incorrectly with enclosure ' . $enclosure); $this->assertEquals('value1', $csv->data[0]['column1'], 'Data parsed incorrectly with enclosure ' . $enclosure); } - - public function countRowsProvider(){ - return [ - 'auto-double-enclosure' => [ - 'auto-double-enclosure.csv', - 2 - ], - 'auto-single-enclosure' => [ - 'auto-single-enclosure.csv', - 2 - ], - 'UTF-8_sep_row' => [ - 'datatype.csv', - 3 - ] - ]; - } - - /** - * @dataProvider countRowsProvider - * - * @param string $file - * @param int $expectedRows - */ - public function testGetTotalRowCountFromFile($file, $expectedRows){ - $this->csv->heading = true; - $this->csv->load_data(__DIR__ . '/fixtures/' . $file); - $this->assertEquals($expectedRows, $this->csv->getTotalDataRowCount()); - } - - public function testGetTotalRowCountMissingEndingLineBreak(){ - $this->csv->heading = false; - $this->csv->enclosure = '"'; - $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; - $this->csv->load_data($sInput); - $this->assertEquals(3, $this->csv->getTotalDataRowCount()); - } } From 611b1a92e85ddf27cc3186bd1763755996fdf892 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:46:41 +0100 Subject: [PATCH 5/7] use strpos instead of preg_match --- src/Csv.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Csv.php b/src/Csv.php index 49ba372..b4b2e1d 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -558,7 +558,7 @@ class Csv { preg_match_all($pattern, $this->file_data, $matches); foreach ($matches[0] as $match) { - if (empty($match) || !preg_match("/{$this->enclosure}/", $match)) { + if (empty($match) || (strpos($match, $this->enclosure) === false)) { continue; } From aaefe2a480b853aab4fdb3a909ca2d555350610a Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:50:35 +0100 Subject: [PATCH 6/7] introduces new local variable that holds the data --- src/Csv.php | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Csv.php b/src/Csv.php index b4b2e1d..297f463 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -552,10 +552,12 @@ class Csv { return false; } - $this->_detect_and_remove_sep_row_from_data($this->file_data); + $data = $this->file_data; + + $this->_detect_and_remove_sep_row_from_data($data); $pattern = sprintf('/("[^%s]*")|[^%s]*/i', $this->enclosure, $this->enclosure); - preg_match_all($pattern, $this->file_data, $matches); + preg_match_all($pattern, $data, $matches); foreach ($matches[0] as $match) { if (empty($match) || (strpos($match, $this->enclosure) === false)) { @@ -563,14 +565,14 @@ class Csv { } $replace = str_replace(["\r", "\n"], '', $match); - $this->file_data = str_replace($match, $replace, $this->file_data); + $data = str_replace($match, $replace, $data); } $headingRow = $this->heading ? 1 : 0; - $count = substr_count($this->file_data, "\r") - + substr_count($this->file_data, "\n") - - substr_count($this->file_data, "\r\n") + $count = substr_count($data, "\r") + + substr_count($data, "\n") + - substr_count($data, "\r\n") - $headingRow; From 68b849a37bb5d8bf7f54a3e35b9631b28b66b41a Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Mon, 26 Feb 2018 08:55:51 +0100 Subject: [PATCH 7/7] corrected regex to fit all given enclosures. Added test for single enclosure --- src/Csv.php | 2 +- tests/methods/DataRowCountTest.php | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Csv.php b/src/Csv.php index 297f463..5f98d79 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -556,7 +556,7 @@ class Csv { $this->_detect_and_remove_sep_row_from_data($data); - $pattern = sprintf('/("[^%s]*")|[^%s]*/i', $this->enclosure, $this->enclosure); + $pattern = sprintf('/(%1$s[^%1$s]*%1$s)/i', $this->enclosure); preg_match_all($pattern, $data, $matches); foreach ($matches[0] as $match) { diff --git a/tests/methods/DataRowCountTest.php b/tests/methods/DataRowCountTest.php index a1fb0e2..693d736 100644 --- a/tests/methods/DataRowCountTest.php +++ b/tests/methods/DataRowCountTest.php @@ -63,4 +63,14 @@ class DataRowCountTest extends TestCase { $this->csv->load_data($sInput); $this->assertEquals(3, $this->csv->getTotalDataRowCount()); } + + + public function testGetTotalRowCountSingleEnclosure() { + $this->csv->heading = false; + $this->csv->enclosure = "'"; + $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\'c\r\nd\'"; + $this->csv->load_data($sInput); + $this->assertEquals(3, $this->csv->getTotalDataRowCount()); + } + }