diff --git a/ChangeLog.txt b/ChangeLog.txt index b552a65..d592923 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,264 +1,282 @@ -ParseCSV 1.0.0-rc.2 ------------------------------------ -Date: unreleased - -- Renamed class from parseCSV to Csv and added name- - space "ParseCsv" for PSR compliance. - -- Added support for MS Excel's "sep=" to detect the - delimiter (Issue #60). - -- Added support for mb_convert_encoding() instead of - iconv() - see issue #109 - -- A number of minor bug fixes - see GitHub issues - ------------------------------------ - - -parseCSV 0.4.3 beta ------------------------------------ -Date: 1-July-2008 - -- Issue #4. Added an option for setting sorting - type behavior when sorting data. - Simply set $csv->sort_type to "regular", "numeric", - or "string". - -- Issue #6. Raw loaded file data is now cleared from - file_data property when it has been successfully - parsed to keep parseCSV's memory footprint to a - minimum. Specifically handy when using multiple - instances of parseCSV to process large files. - ------------------------------------ - - -parseCSV 0.4.2 beta ------------------------------------ -Date: 31-May-2008 - -- IMPORTANT! If you're using the output(), - method please note that the first parameter - has been completely removed as it was - technically just useless. Instead, the second - parameter (filename) doubles as its replacement. - Simply put, if filename is not set or null, the - output() method will not output a downloadable - file. Please update your existing code - when using 0.4.2 and later :) - -- Small fix to the headers sent by the output() - method. - -- Added a download example using the output() - method to the examples folder. - ------------------------------------ - - -parseCSV 0.4.1 beta ------------------------------------ -Date: 29-May-2008 - -- Fixed a small bug in how the output() method - handles input data. - ------------------------------------ - - -parseCSV 0.4 beta ------------------------------------ -Date: 11-Apr-2008 - -- Error reporting for files/data which is corrupt - or has formatting errors like using double - quotes in a field without enclosing quotes. Or - not escaping double quotes with a second one. - -- parse() method does not require input anymore - if the "$object->file" property has been set. - -I'm calling this a beta release due to the heavy -modifications to the core parsing logic required -for error reporting to work. I have tested the -new code quite extensively, I'm fairly confident -that it still parses exactly as it always has. - -The second reason I'm calling it a beta release -is cause I'm sure the error reporting code will -need more refinements and tweaks to detect more -types of errors, as it's only picking two types -or syntax errors right now. However, it seems -these two are the most common errors that you -would be likely to come across. - ------------------------------------ - - -parseCSV 0.3.2 ------------------------------------ -Date: 1-Apr-2008 - -This is primarily a bug-fix release for a critical -bug which was brought to my attention. - -- Fixed a critical bug in conditions parsing which - would generate corrupt matching patterns causing - the condition(s) to not work at all in some - situations. - -- Fixed a small code error which would cause PHP to - generate a invalid offset notice when zero length - values were fed into the unparse() method to - generate CSV data from an array. - -Notice: If you have been using the "parsecsv-stable" -branch as an external in any of your projects, -please use the "stable/parsecsv" branch from this -point on as I will eventually remove the former due -to it's stupid naming. - ------------------------------------ - - -parseCSV 0.3.1 ------------------------------------ -Date: 1-Sep-2007 - -- Small change to default output settings to - conform with RFC 4180 (http://rfc.net/rfc4180.html). - Only the LF (line feed) character was used - by default to separate rows, rather than - CRLF (carriage return & line feed). - ------------------------------------ - - -parseCSV 0.3.0 ------------------------------------ -Date: 9-Aug-2007 - -- Changed to the MIT license. - -- Added offset and limit options. - -- Added SQL-like conditions for quickly - filtering out entries. Documentation on the - condition syntax is forthcoming. - -- Small parsing modification to comply - with some recent changes to the specifications - outlined on Wikipedia's Comma-separated values - article. - -- Minor changes and optimizations, and a few - spelling corrections. Oops :) - -- Included more complex code examples in the - parseCSV download. - ------------------------------------ - - -parseCSV 0.2.1 ------------------------------------ -Date: 8-Aug-2007 - -- Fixed stupid code which caused auto function - to not work in some situations. - ------------------------------------ - - -parseCSV 0.2.0 beta ------------------------------------ -Date: 2-Jan-2007 - -- Added auto() function to automatically detect - delimiter character. - Useful for user upload in case delimiter is - comma (,), tab, or semi-colon (;). Some - versions of MS Excel for Windows use - semi-colons instead of commas when saving to - CSV files. - It uses a process of elimination to eliminate - characters that can not be the delimiter, - so it should work on all CSV-structured files - almost no matter what the delimiter is. - -- Generally updated some of the core workings - to increase performance, and offer better - support for large (1MB and up) files. - -- Added code examples to header comment. - ------------------------------------ - - -parseCSV 0.1.6 beta ------------------------------------ -Date: 22-Dec-2006 - -- Updated output() function. - ------------------------------------ - - -parseCSV 0.1.5 beta ------------------------------------ -Date: 22-Dec-2006 - -- Added output() function for easy output to - browser, for downloading features for example. - ------------------------------------ - - -parseCSV 0.1.4 beta ------------------------------------ -Date: 17-Dec-2006 - -- Minor changes and fixes - ------------------------------------ - - -parseCSV 0.1.3 beta ------------------------------------ -Date: 17-Dec-2006 - -- Added GPL v2.0 license. - ------------------------------------ - - -parseCSV 0.1.2 beta ------------------------------------ -Date: 17-Dec-2006 - -- Added encoding() function for easier character - encoding configuration. - ------------------------------------ - - -parseCSV 0.1.1 beta ------------------------------------ -Date: 24-Nov-2006 - -- Added support for a PHP die command on first - line of csv files if they have a .php extension - to protect secure data from being displayed - directly to the browser. - ------------------------------------ - - -parseCSV 0.1 beta ------------------------------------ -Date: 23-Nov-2006 - -- Initial release - ------------------------------------ +ParseCSV dev-master +----------------------------------- +Date: unreleased + +- New function getTotalDataRowCount() - useful if + $limit is set - see pull request #122. + +----------------------------------- + + +ParseCSV 1.0.0 +----------------------------------- +Date: 3-March-2018 + +- Renamed class from parseCSV to Csv and added name- + space "ParseCsv" for PSR compliance. + +- Added support for MS Excel's "sep=" to detect the + delimiter (Issue #60). + +- Added data type detection - function getDatatypes() + guesses the type of each column. + +- MIME: output() sends correct MIME type to browser + if the separator is a tab tab (Issue #79). + +- Added support for mb_convert_encoding() instead of + iconv() - see issue #109. + +- A number of minor bug fixes - see GitHub issues. + +- Added many more unit tests. + +----------------------------------- + + +parseCSV 0.4.3 beta +----------------------------------- +Date: 1-July-2008 + +- Issue #4. Added an option for setting sorting + type behavior when sorting data. + Simply set $csv->sort_type to "regular", "numeric", + or "string". + +- Issue #6. Raw loaded file data is now cleared from + file_data property when it has been successfully + parsed to keep parseCSV's memory footprint to a + minimum. Specifically handy when using multiple + instances of parseCSV to process large files. + +----------------------------------- + + +parseCSV 0.4.2 beta +----------------------------------- +Date: 31-May-2008 + +- IMPORTANT! If you're using the output(), + method please note that the first parameter + has been completely removed as it was + technically just useless. Instead, the second + parameter (filename) doubles as its replacement. + Simply put, if filename is not set or null, the + output() method will not output a downloadable + file. Please update your existing code + when using 0.4.2 and later :) + +- Small fix to the headers sent by the output() + method. + +- Added a download example using the output() + method to the examples folder. + +----------------------------------- + + +parseCSV 0.4.1 beta +----------------------------------- +Date: 29-May-2008 + +- Fixed a small bug in how the output() method + handles input data. + +----------------------------------- + + +parseCSV 0.4 beta +----------------------------------- +Date: 11-Apr-2008 + +- Error reporting for files/data which is corrupt + or has formatting errors like using double + quotes in a field without enclosing quotes. Or + not escaping double quotes with a second one. + +- parse() method does not require input anymore + if the "$object->file" property has been set. + +I'm calling this a beta release due to the heavy +modifications to the core parsing logic required +for error reporting to work. I have tested the +new code quite extensively, I'm fairly confident +that it still parses exactly as it always has. + +The second reason I'm calling it a beta release +is cause I'm sure the error reporting code will +need more refinements and tweaks to detect more +types of errors, as it's only picking two types +or syntax errors right now. However, it seems +these two are the most common errors that you +would be likely to come across. + +----------------------------------- + + +parseCSV 0.3.2 +----------------------------------- +Date: 1-Apr-2008 + +This is primarily a bug-fix release for a critical +bug which was brought to my attention. + +- Fixed a critical bug in conditions parsing which + would generate corrupt matching patterns causing + the condition(s) to not work at all in some + situations. + +- Fixed a small code error which would cause PHP to + generate a invalid offset notice when zero length + values were fed into the unparse() method to + generate CSV data from an array. + +Notice: If you have been using the "parsecsv-stable" +branch as an external in any of your projects, +please use the "stable/parsecsv" branch from this +point on as I will eventually remove the former due +to it's stupid naming. + +----------------------------------- + + +parseCSV 0.3.1 +----------------------------------- +Date: 1-Sep-2007 + +- Small change to default output settings to + conform with RFC 4180 (http://rfc.net/rfc4180.html). + Only the LF (line feed) character was used + by default to separate rows, rather than + CRLF (carriage return & line feed). + +----------------------------------- + + +parseCSV 0.3.0 +----------------------------------- +Date: 9-Aug-2007 + +- Changed to the MIT license. + +- Added offset and limit options. + +- Added SQL-like conditions for quickly + filtering out entries. Documentation on the + condition syntax is forthcoming. + +- Small parsing modification to comply + with some recent changes to the specifications + outlined on Wikipedia's Comma-separated values + article. + +- Minor changes and optimizations, and a few + spelling corrections. Oops :) + +- Included more complex code examples in the + parseCSV download. + +----------------------------------- + + +parseCSV 0.2.1 +----------------------------------- +Date: 8-Aug-2007 + +- Fixed stupid code which caused auto function + to not work in some situations. + +----------------------------------- + + +parseCSV 0.2.0 beta +----------------------------------- +Date: 2-Jan-2007 + +- Added auto() function to automatically detect + delimiter character. + Useful for user upload in case delimiter is + comma (,), tab, or semi-colon (;). Some + versions of MS Excel for Windows use + semi-colons instead of commas when saving to + CSV files. + It uses a process of elimination to eliminate + characters that can not be the delimiter, + so it should work on all CSV-structured files + almost no matter what the delimiter is. + +- Generally updated some of the core workings + to increase performance, and offer better + support for large (1MB and up) files. + +- Added code examples to header comment. + +----------------------------------- + + +parseCSV 0.1.6 beta +----------------------------------- +Date: 22-Dec-2006 + +- Updated output() function. + +----------------------------------- + + +parseCSV 0.1.5 beta +----------------------------------- +Date: 22-Dec-2006 + +- Added output() function for easy output to + browser, for downloading features for example. + +----------------------------------- + + +parseCSV 0.1.4 beta +----------------------------------- +Date: 17-Dec-2006 + +- Minor changes and fixes + +----------------------------------- + + +parseCSV 0.1.3 beta +----------------------------------- +Date: 17-Dec-2006 + +- Added GPL v2.0 license. + +----------------------------------- + + +parseCSV 0.1.2 beta +----------------------------------- +Date: 17-Dec-2006 + +- Added encoding() function for easier character + encoding configuration. + +----------------------------------- + + +parseCSV 0.1.1 beta +----------------------------------- +Date: 24-Nov-2006 + +- Added support for a PHP die command on first + line of csv files if they have a .php extension + to protect secure data from being displayed + directly to the browser. + +----------------------------------- + + +parseCSV 0.1 beta +----------------------------------- +Date: 23-Nov-2006 + +- Initial release + +----------------------------------- diff --git a/README.md b/README.md index f2960f6..3a62600 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,23 @@ and third-party support for handling CSV data in PHP. [csv]: http://en.wikipedia.org/wiki/Comma-separated_values +## Features + +* ParseCsv is a complete and fully featured CSV solution for PHP +* Supports enclosed values, enclosed commas, double quotes and new lines. +* Automatic delimiter character detection. +* Sort data by specific fields/columns. +* Easy data manipulation. +* Basic SQL-like _conditions_, _offset_ and _limit_ options for filtering + data. +* Error detection for incorrectly formatted input. It attempts to be + intelligent, but can not be trusted 100% due to the structure of CSV, and + how different programs like Excel for example outputs CSV data. +* Support for character encoding conversion using PHP's + `iconv()` and `mb_convert_encoding()` functions. +* Supports PHP 5.4 and higher. + It certainly works with PHP 7.2 and all versions in between. + ## Installation Installation is easy using Composer. Just run the following on the @@ -33,23 +50,6 @@ repository or extract the [ZIP](https://github.com/parsecsv/parsecsv-for-php/archive/master.zip). To use ParseCSV, you then have to add a `require 'parsecsv.lib.php';` line. -## Features - -* ParseCsv is a complete and fully featured CSV solution for PHP -* Supports enclosed values, enclosed commas, double quotes and new lines. -* Automatic delimiter character detection. -* Sort data by specific fields/columns. -* Easy data manipulation. -* Basic SQL-like _conditions_, _offset_ and _limit_ options for filtering - data. -* Error detection for incorrectly formatted input. It attempts to be - intelligent, but can not be trusted 100% due to the structure of CSV, and - how different programs like Excel for example outputs CSV data. -* Support for character encoding conversion using PHP's - `iconv()` and `mb_convert_encoding()` functions. -* Supports PHP 5.4 and higher. - It certainly works with PHP 7.2 and all versions in between. - ## Example Usage **General** @@ -77,6 +77,40 @@ $csv->auto('data.csv'); print_r($csv->data); ``` +**Parse data with offset** +* ignoring the first X (e.g. two) rows +```php +$csv = new ParseCsv\Csv(); +$csv->offset = 2; +$csv->parse('data.csv'); +print_r($csv->data); +``` + +**Limit the number of returned data rows** +```php +$csv = new ParseCsv\Csv(); +$csv->limit = 5; +$csv->parse('data.csv'); +print_r($csv->data); +``` + +**Get total number of data rows without parsing whole data** +* Excluding heading line if present (see $csv->header property) +```php +$csv = new ParseCsv\Csv(); +$csv->load_data('data.csv'); +$count = $csv->getTotalRowCount(); +print_r($count); +``` + +**Get most common data type for each column (Requires PHP >= 5.5)** + +```php +$csv = new ParseCsv\Csv('data.csv'); +$csv->getDatatypes() +print_r($csv->data_types); +``` + **Modify data in a CSV file** ```php diff --git a/parsecsv.lib.php b/parsecsv.lib.php index 902903b..265ff22 100644 --- a/parsecsv.lib.php +++ b/parsecsv.lib.php @@ -6,6 +6,10 @@ // Check if people used Composer to include this project in theirs if (!file_exists(__DIR__ . '/vendor/autoload.php')) { + require __DIR__ . '/src/enums/AbstractEnum.php'; + require __DIR__ . '/src/enums/DatatypeEnum.php'; + require __DIR__ . '/src/enums/FileProcessingModeEnum.php'; + require __DIR__ . '/src/enums/SortEnum.php'; require __DIR__ . '/src/extensions/DatatypeTrait.php'; require __DIR__ . '/src/Csv.php'; } else { diff --git a/src/Csv.php b/src/Csv.php index da93f40..fdb34ab 100644 --- a/src/Csv.php +++ b/src/Csv.php @@ -2,12 +2,13 @@ namespace ParseCsv; +use ParseCsv\enums\FileProcessingModeEnum; +use ParseCsv\enums\SortEnum; use ParseCsv\extensions\DatatypeTrait; class Csv { /* - Class: ParseCSV 1.0.0-rc.2 https://github.com/parsecsv/parsecsv-for-php Fully conforms to the specifications lined out on Wikipedia: @@ -89,7 +90,7 @@ class Csv { * * @var string|null */ - public $sort_type = null; + public $sort_type = SortEnum::SORT_TYPE_REGULAR; /** * Delimiter @@ -299,12 +300,34 @@ class Csv { * Class constructor * * @param string|null $input The CSV string or a direct filepath - * @param integer|null $offset Number of rows to ignore from the beginning of the data - * @param integer|null $limit Limits the number of returned rows to specified amount - * @param string|null $conditions Basic SQL-like conditions for row matching - * @param null|true $keep_file_data Keep raw file data in memory after successful parsing (useful for debugging) + * @param integer|null $offset Number of rows to ignore from the beginning + * of the data + * @param integer|null $limit Limits the number of returned rows to + * specified amount + * @param string|null $conditions Basic SQL-like conditions for row + * matching + * @param null|true $keep_file_data Keep raw file data in memory after + * successful parsing (useful for debugging) */ public function __construct($input = null, $offset = null, $limit = null, $conditions = null, $keep_file_data = null) { + $this->init($offset, $limit, $conditions, $keep_file_data); + + if (!empty($input)) { + $this->parse($input); + } + } + + /** + * @param integer|null $offset Number of rows to ignore from the beginning + * of the data + * @param integer|null $limit Limits the number of returned rows to + * specified amount + * @param string|null $conditions Basic SQL-like conditions for row + * matching + * @param null|true $keep_file_data Keep raw file data in memory after + * successful parsing (useful for debugging) + */ + public function init($offset = null, $limit = null, $conditions = null, $keep_file_data = null) { if (!is_null($offset)) { $this->offset = $offset; } @@ -320,10 +343,6 @@ class Csv { if (!is_null($keep_file_data)) { $this->keep_file_data = $keep_file_data; } - - if (!empty($input)) { - $this->parse($input); - } } // ============================================== @@ -346,32 +365,29 @@ class Csv { $input = $this->file; } - if (!empty($input)) { - if (!is_null($offset)) { - $this->offset = $offset; - } + if (empty($input)) { + // todo: but why true? + return true; + } - if (!is_null($limit)) { - $this->limit = $limit; - } + $this->init($offset, $limit, $conditions); - if (!is_null($conditions)) { - $this->conditions = $conditions; - } - if (strlen($input) <= PHP_MAXPATHLEN && is_readable($input)) { - $this->data = $this->parse_file($input); - } else { - $this->file_data = &$input; - $this->data = $this->parse_string(); - } + if (strlen($input) <= PHP_MAXPATHLEN && is_readable($input)) { + $this->file = $input; + $this->data = $this->parse_file(); + } else { + $this->file = null; + $this->file_data = &$input; + $this->data = $this->parse_string(); + } - if ($this->data === false) { - return false; - } + if ($this->data === false) { + return false; } return true; + } /** @@ -381,16 +397,16 @@ class Csv { * @param string $file File location to save to * @param array $data 2D array of data * @param bool $append Append current data to end of target CSV, if file exists - * @param array $fields Field names + * @param array $fields Field names. Sets the header. If it is not set $this->titles would be used instead. * * @return bool */ - public function save($file = '', $data = array(), $append = false, $fields = array()) { + public function save($file = '', $data = array(), $append = FileProcessingModeEnum::MODE_FILE_OVERWRITE, $fields = array()) { if (empty($file)) { $file = &$this->file; } - $mode = $append ? 'ab' : 'wb'; + $mode = FileProcessingModeEnum::getAppendMode($append); $is_php = preg_match('/\.php$/i', $file) ? true : false; return $this->_wfile($file, $this->unparse($data, $fields, $append, $is_php), $mode); @@ -510,6 +526,44 @@ class Csv { return $this->delimiter; } + /** + * Get total number of data rows (exclusive heading line if present) in csv + * without parsing whole data. + * + * @return bool|int + */ + public function getTotalDataRowCount() { + if (empty($this->file_data)) { + return false; + } + + $data = $this->file_data; + + $this->_detect_and_remove_sep_row_from_data($data); + + $pattern = sprintf('/(%1$s[^%1$s]*%1$s)/i', $this->enclosure); + preg_match_all($pattern, $data, $matches); + + foreach ($matches[0] as $match) { + if (empty($match) || (strpos($match, $this->enclosure) === false)) { + continue; + } + + $replace = str_replace(["\r", "\n"], '', $match); + $data = str_replace($match, $replace, $data); + } + + $headingRow = $this->heading ? 1 : 0; + + $count = substr_count($data, "\r") + + substr_count($data, "\n") + - substr_count($data, "\r\n") + - $headingRow; + + + return $count; + } + // ============================================== // ----- [ Core Functions ] --------------------- // ============================================== @@ -522,7 +576,7 @@ class Csv { * * @return array|bool */ - public function parse_file($file = null) { + protected function parse_file($file = null) { if (is_null($file)) { $file = $this->file; } @@ -545,7 +599,7 @@ class Csv { * * @return array|false - 2D array with CSV data, or false on failure */ - public function parse_string($data = null) { + protected function parse_string($data = null) { if (empty($data)) { if ($this->_check_data()) { $data = &$this->file_data; @@ -696,13 +750,7 @@ class Csv { $this->titles = $head; if (!empty($this->sort_by)) { - $sort_type = SORT_REGULAR; - if ($this->sort_type == 'numeric') { - $sort_type = SORT_NUMERIC; - } elseif ($this->sort_type == 'string') { - $sort_type = SORT_STRING; - } - + $sort_type = SortEnum::getSorting($this->sort_type); $this->sort_reverse ? krsort($rows, $sort_type) : ksort($rows, $sort_type); if ($this->offset !== null || $this->limit !== null) { @@ -730,7 +778,7 @@ class Csv { * * @return string CSV data */ - public function unparse($data = array(), $fields = array(), $append = false, $is_php = false, $delimiter = null) { + public function unparse($data = array(), $fields = array(), $append = FileProcessingModeEnum::MODE_FILE_OVERWRITE, $is_php = false, $delimiter = null) { if (!is_array($data) || empty($data)) { $data = &$this->data; } @@ -747,8 +795,15 @@ class Csv { $entry = array(); // create heading + $fieldOrder = $this->_validate_fields_for_unparse($fields); + if (!$fieldOrder && !empty($data)) { + $column_count = count($data[0]); + $columns = range(0, $column_count - 1, 1); + $fieldOrder = array_combine($columns, $columns); + } + if ($this->heading && !$append && !empty($fields)) { - foreach ($fields as $key => $column_name) { + foreach ($fieldOrder as $column_name) { $entry[] = $this->_enclose_value($column_name, $delimiter); } @@ -758,7 +813,8 @@ class Csv { // create data foreach ($data as $key => $row) { - foreach ($row as $cell_value) { + foreach (array_keys($fieldOrder) as $index){ + $cell_value = $row[$index]; $entry[] = $this->_enclose_value($cell_value, $delimiter); } @@ -773,6 +829,42 @@ class Csv { return $string; } + private function _validate_fields_for_unparse($fields){ + // this is needed because sometime titles property is overwritten instead of using fields parameter! + $titlesOnParse = !empty($this->data) ? array_keys($this->data[0]) : array(); + if (empty($fields)){ + $fields = $this->titles; + } + + if (empty($fields)){ + return array(); + } + + // both are identical, also in ordering + if (array_values($fields) === array_values($titlesOnParse)){ + return array_combine($fields, $fields); + } + + // if renaming given by: $oldName => $newName (maybe with reorder and / or subset): + // todo: this will only work if titles are unique + $fieldOrder = array_intersect(array_flip($fields), $titlesOnParse); + if (!empty($fieldOrder)) { + return array_flip($fieldOrder); + } + + $fieldOrder = array_intersect($fields, $titlesOnParse); + if (!empty($fieldOrder)) { + return array_combine($fieldOrder, $fieldOrder); + } + + // original titles are not given in fields. that is okay if count is okay. + if (count($fields) != count($titlesOnParse)) { + throw new \UnexpectedValueException('The specified fields do not match any titles and do not match column count.'); + } + + return array_combine($titlesOnParse, $fields); + } + /** * Load local file or string * @@ -885,12 +977,19 @@ class Csv { */ protected function _validate_row_condition($row, $condition) { $operators = array( - '=', 'equals', 'is', - '!=', 'is not', - '<', 'is less than', - '>', 'is greater than', - '<=', 'is less than or equals', - '>=', 'is greater than or equals', + '=', + 'equals', + 'is', + '!=', + 'is not', + '<', + 'is less than', + '>', + 'is greater than', + '<=', + 'is less than or equals', + '>=', + 'is greater than or equals', 'contains', 'does not contain', ); diff --git a/src/enums/AbstractEnum.php b/src/enums/AbstractEnum.php new file mode 100644 index 0000000..aae78e1 --- /dev/null +++ b/src/enums/AbstractEnum.php @@ -0,0 +1,40 @@ +isValid($value)) { + throw new \UnexpectedValueException("Value '$value' is not part of the enum " . get_called_class()); + } + $this->value = $value; + } + + public static function getConstants(){ + $class = get_called_class(); + $reflection = new \ReflectionClass($class); + + return $reflection->getConstants(); + } + + /** + * Check if enum value is valid + * + * @param $value + * + * @return bool + */ + public static function isValid($value) + { + return in_array($value, static::getConstants(), true); + } +} diff --git a/src/enums/DatatypeEnum.php b/src/enums/DatatypeEnum.php index 8fea47d..7f490e9 100644 --- a/src/enums/DatatypeEnum.php +++ b/src/enums/DatatypeEnum.php @@ -9,7 +9,7 @@ namespace ParseCsv\enums; * * todo: needs a basic parent enum class for error handling. */ -class DatatypeEnum { +class DatatypeEnum extends AbstractEnum { const __DEFAULT = self::TYPE_STRING; diff --git a/src/enums/FileProcessingModeEnum.php b/src/enums/FileProcessingModeEnum.php new file mode 100644 index 0000000..ab88055 --- /dev/null +++ b/src/enums/FileProcessingModeEnum.php @@ -0,0 +1,28 @@ + SORT_REGULAR, + self::SORT_TYPE_STRING => SORT_STRING, + self::SORT_TYPE_NUMERIC => SORT_NUMERIC + ); + + public static function getSorting($type){ + if (array_key_exists($type, self::$sorting)){ + return self::$sorting[$type]; + } + + return self::$sorting[self::__DEFAULT]; + } + +} diff --git a/src/extensions/DatatypeTrait.php b/src/extensions/DatatypeTrait.php index 3e66405..475fc40 100644 --- a/src/extensions/DatatypeTrait.php +++ b/src/extensions/DatatypeTrait.php @@ -2,6 +2,8 @@ namespace ParseCsv\extensions; +use ParseCsv\enums\DatatypeEnum; + trait DatatypeTrait { /** @@ -47,7 +49,7 @@ trait DatatypeTrait { * * @access public * - * @uses getDatatypeFromString + * @uses DatatypeEnum::getValidTypeFromSample * * @return array|bool */ @@ -62,7 +64,7 @@ trait DatatypeTrait { $result = []; foreach ($this->titles as $cName) { $column = array_column($this->data, $cName); - $cDatatypes = array_map('ParseCsv\enums\DatatypeEnum::getValidTypeFromSample', $column); + $cDatatypes = array_map(DatatypeEnum::class . '::getValidTypeFromSample', $column); $result[$cName] = $this->getMostFrequentDatatypeForColumn($cDatatypes); } @@ -71,4 +73,41 @@ trait DatatypeTrait { return !empty($this->data_types) ? $this->data_types : []; } + + /** + * Check data type of titles / first row for auto detecting if this could be + * a heading line. + * + * Requires PHP >= 5.5 + * + * @access public + * + * @uses DatatypeEnum::getValidTypeFromSample + * + * @return bool + */ + public function autoDetectFileHasHeading(){ + if (empty($this->data)){ + throw new \UnexpectedValueException('No data set yet.'); + } + + if ($this->heading){ + $firstRow = $this->titles; + } else { + $firstRow = $this->data[0]; + } + + $firstRow = array_filter($firstRow); + if (empty($firstRow)){ + return false; + } + + $firstRowDatatype = array_map(DatatypeEnum::class . '::getValidTypeFromSample', $firstRow); + + if ($this->getMostFrequentDatatypeForColumn($firstRowDatatype) !== DatatypeEnum::TYPE_STRING){ + return false; + } + + return true; + } } diff --git a/tests/methods/DataRowCountTest.php b/tests/methods/DataRowCountTest.php new file mode 100644 index 0000000..693d736 --- /dev/null +++ b/tests/methods/DataRowCountTest.php @@ -0,0 +1,76 @@ +csv = new Csv(); + } + + public function countRowsProvider() { + return [ + 'auto-double-enclosure' => [ + 'auto-double-enclosure.csv', + 2, + ], + 'auto-single-enclosure' => [ + 'auto-single-enclosure.csv', + 2, + ], + 'UTF-8_sep_row' => [ + 'datatype.csv', + 3, + ], + ]; + } + + /** + * @dataProvider countRowsProvider + * + * @param string $file + * @param int $expectedRows + */ + public function testGetTotalRowCountFromFile($file, $expectedRows) { + $this->csv->heading = true; + $this->csv->load_data(__DIR__ . '/fixtures/' . $file); + $this->assertEquals($expectedRows, $this->csv->getTotalDataRowCount()); + } + + public function testGetTotalRowCountMissingEndingLineBreak() { + $this->csv->heading = false; + $this->csv->enclosure = '"'; + $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; + $this->csv->load_data($sInput); + $this->assertEquals(3, $this->csv->getTotalDataRowCount()); + } + + + public function testGetTotalRowCountSingleEnclosure() { + $this->csv->heading = false; + $this->csv->enclosure = "'"; + $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\'c\r\nd\'"; + $this->csv->load_data($sInput); + $this->assertEquals(3, $this->csv->getTotalDataRowCount()); + } + +} diff --git a/tests/methods/ParseTest.php b/tests/methods/ParseTest.php index b7aca58..443bced 100644 --- a/tests/methods/ParseTest.php +++ b/tests/methods/ParseTest.php @@ -85,7 +85,8 @@ class ParseTest extends TestCase { $this->csv->enclosure = '"'; $sInput = "86545235689,a\r\n34365587654,b\r\n13469874576,\"c\r\nd\""; $expected_data = [86545235689, 34365587654, 13469874576]; - $actual_data = $this->csv->parse_string($sInput); + + $actual_data = $this->invokeMethod($this->csv, 'parse_string', array($sInput)); $actual_column = array_map('reset', $actual_data); $this->assertEquals($expected_data, $actual_column); $this->assertEquals([ @@ -153,6 +154,34 @@ class ParseTest extends TestCase { $this->assertEquals($expected, $this->csv->data_types); } + /** + * @depends testSepRowAutoDetection + */ + public function testAutoDetectFileHasHeading(){ + if (!function_exists('array_column')) { + // getDatatypes requires array_column, but that + // function is only available in PHP >= 5.5 + return; + } + + $this->csv->auto(__DIR__ . '/fixtures/datatype.csv'); + $this->assertTrue($this->csv->autoDetectFileHasHeading()); + + $this->csv->heading = false; + $this->csv->auto(__DIR__ . '/fixtures/datatype.csv'); + $this->assertTrue($this->csv->autoDetectFileHasHeading()); + + $this->csv->heading = false; + $sInput = "86545235689\r\n34365587654\r\n13469874576"; + $this->csv->auto($sInput); + $this->assertFalse($this->csv->autoDetectFileHasHeading()); + + $this->csv->heading = true; + $sInput = "86545235689\r\n34365587654\r\n13469874576"; + $this->csv->auto($sInput); + $this->assertFalse($this->csv->autoDetectFileHasHeading()); + } + protected function _get_magazines_data() { return [ [ @@ -194,4 +223,22 @@ class ParseTest extends TestCase { $this->assertArrayHasKey('column1', $csv->data[0], 'Data parsed incorrectly with enclosure ' . $enclosure); $this->assertEquals('value1', $csv->data[0]['column1'], 'Data parsed incorrectly with enclosure ' . $enclosure); } + + /** + * Call protected/private method of a class. + * + * @param object &$object Instantiated object that we will run method on. + * @param string $methodName Method name to call + * @param array $parameters Array of parameters to pass into method. + * + * @return mixed Method return. + */ + private function invokeMethod(&$object, $methodName, array $parameters = array()) + { + $reflection = new \ReflectionClass(get_class($object)); + $method = $reflection->getMethod($methodName); + $method->setAccessible(true); + + return $method->invokeArgs($object, $parameters); + } } diff --git a/tests/methods/SaveTest.php b/tests/methods/SaveTest.php index 6e43b97..335844c 100644 --- a/tests/methods/SaveTest.php +++ b/tests/methods/SaveTest.php @@ -49,6 +49,13 @@ class SaveTest extends TestCase $this->saveAndCompare($expected); } + public function testSaveWithNewHeader() { + $this->csv->linefeed = "\n"; + $this->csv->titles = array("NewTitle"); + $expected = "NewTitle\n0444\n5555\n"; + $this->saveAndCompare($expected); + } + public function testSaveWithoutHeader() { $this->csv->linefeed = "\n"; $this->csv->heading = false; diff --git a/tests/methods/UnparseTest.php b/tests/methods/UnparseTest.php new file mode 100644 index 0000000..dbc1f75 --- /dev/null +++ b/tests/methods/UnparseTest.php @@ -0,0 +1,62 @@ +csv = new Csv(); + $this->csv->auto(__DIR__ . '/fixtures/auto-double-enclosure.csv'); + } + + public function testUnparseDefault() { + $expected = "column1,column2\rvalue1,value2\rvalue3,value4\r"; + $this->unparseAndCompare($expected); + } + + public function testUnparseDefaultWithoutHeading(){ + $this->csv->heading = false; + $this->csv->auto(__DIR__ . '/fixtures/auto-double-enclosure.csv'); + $expected = "column1,column2\rvalue1,value2\rvalue3,value4\r"; + $this->unparseAndCompare($expected); + + } + + public function testUnparseRenameFields() { + $expected = "C1,C2\rvalue1,value2\rvalue3,value4\r"; + $this->unparseAndCompare($expected, array("C1", "C2")); + } + + public function testReorderFields() { + $expected = "column2,column1\rvalue2,value1\rvalue4,value3\r"; + $this->unparseAndCompare($expected, array("column2", "column1")); + } + + public function testSubsetFields() { + $expected = "column1\rvalue1\rvalue3\r"; + $this->unparseAndCompare($expected, array("column1")); + } + + public function testReorderAndRenameFields() { + $fields = array( + 'column2' => 'C2', + 'column1' => 'C1', + ); + $expected = "C2,C1\rvalue2,value1\rvalue4,value3\r"; + $this->unparseAndCompare($expected, $fields); + } + + private function unparseAndCompare($expected, $fields = array()) { + $str = $this->csv->unparse($this->csv->data, $fields); + $this->assertEquals($expected, $str); + } + +} diff --git a/tests/properties/DefaultValuesPropertiesTest.php b/tests/properties/DefaultValuesPropertiesTest.php index 803afcc..364f7cd 100644 --- a/tests/properties/DefaultValuesPropertiesTest.php +++ b/tests/properties/DefaultValuesPropertiesTest.php @@ -57,7 +57,7 @@ class DefaultValuesPropertiesTest extends TestCase { } public function test_sort_type_default() { - $this->assertNull($this->csv->sort_type); + $this->assertEquals('regular', $this->csv->sort_type); } public function test_delimiter_default() { diff --git a/tests/properties/PublicPropertiesTest.php b/tests/properties/PublicPropertiesTest.php index fbf85d5..ed49354 100644 --- a/tests/properties/PublicPropertiesTest.php +++ b/tests/properties/PublicPropertiesTest.php @@ -3,6 +3,7 @@ namespace ParseCsv\tests\properties; use ParseCsv\Csv; +use ParseCsv\enums\SortEnum; use PHPUnit\Framework\TestCase; class PublicPropertiesTest extends TestCase { @@ -145,4 +146,26 @@ class PublicPropertiesTest extends TestCase { $this->assertCount($counter, $this->properties); } + + public function testDefaultSortTypeIsRegular(){ + $this->assertEquals(SortEnum::SORT_TYPE_REGULAR, $this->csv->sort_type); + } + + public function testSetSortType(){ + $this->csv->sort_type = 'numeric'; + $this->assertEquals(SortEnum::SORT_TYPE_NUMERIC, $this->csv->sort_type); + + $this->csv->sort_type = 'string'; + $this->assertEquals(SortEnum::SORT_TYPE_STRING, $this->csv->sort_type); + } + + public function testGetSorting(){ + $this->csv->sort_type = 'numeric'; + $sorting = SortEnum::getSorting($this->csv->sort_type); + $this->assertEquals(SORT_NUMERIC, $sorting); + + $this->csv->sort_type = 'string'; + $sorting = SortEnum::getSorting($this->csv->sort_type); + $this->assertEquals(SORT_STRING, $sorting); + } }