From f6bd414ae7b5dc1904f5bea9aa2ec89ac052179c Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Sun, 18 Feb 2018 00:09:22 +0100 Subject: [PATCH 1/4] init enum for data type --- src/enums/DatatypeEnum.php | 113 +++++++++++++++++++++++++++++++ src/extensions/DatatypeTrait.php | 43 ++---------- 2 files changed, 117 insertions(+), 39 deletions(-) create mode 100644 src/enums/DatatypeEnum.php diff --git a/src/enums/DatatypeEnum.php b/src/enums/DatatypeEnum.php new file mode 100644 index 0000000..83d64d1 --- /dev/null +++ b/src/enums/DatatypeEnum.php @@ -0,0 +1,113 @@ + null, + self::TYPE_FLOAT => 'isValidFloat', + self::TYPE_INT => 'isValidInteger', + self::TYPE_BOOL => 'isValidBoolean', + self::TYPE_DATE => 'isValidDate' + ); + + /** + * Checks data type for given string. + * + * @param $value + * + * @return bool|string + */ + public static function getValidTypeFromSample($value){ + $value = trim((string) $value); + + if (empty($value)){ + return false; + } + + foreach (self::$validators as $type => $validator){ + if ($validator === null){ + continue; + } + + if (method_exists(self, $validator)){ + call_user_func($validator($value)); + return $type; + } + + return self::__DEFAULT; + } + } + + /** + * Check if string is float value. + * + * @param $value + * + * @return false|int + */ + private static function isValidFloat($value) { + return preg_match(self::REGEX_FLOAT, $value); + } + + /** + * Check if string is integer value. + * + * @param $value + * + * @return false|int + */ + private static function isValidInteger($value) { + return preg_match(self::REGEX_INT, $value); + } + + /** + * Check if string is boolean. + * + * @param $value + * + * @return false|int + */ + private static function isValidBoolean($value) { + return preg_match(self::REGEX_BOOL, $value); + } + + /** + * Check if string is date. + * + * @param $value + * + * @return false|int + */ + private static function isValidDate($value) { + return (bool) strtotime($value); + } +} diff --git a/src/extensions/DatatypeTrait.php b/src/extensions/DatatypeTrait.php index e6b1246..aff0c9c 100644 --- a/src/extensions/DatatypeTrait.php +++ b/src/extensions/DatatypeTrait.php @@ -12,42 +12,6 @@ trait DatatypeTrait { */ public $data_types = []; - /** - * Check data type - * Check for possible data types for one field value string. - * - * @access private - * - * @param string $value cell value - * - * @return string - */ - private function getDatatypeFromString($value) { - $value = trim((string) $value); - - if (empty($value)) { - return 'unknown'; - } - - if (preg_match('/^(?i:true|false)$/', $value)) { - return 'boolean'; - } - - if (preg_match('/^[-+]?[0-9]\d*$/', $value)) { - return 'integer'; - } - - if (preg_match('/^[+-]?([0-9]*[.])?([0-9]|[.][0-9])+$/', $value)) { - return 'float'; - } - - if ((bool) strtotime($value)) { - return 'date'; - } - - return 'string'; - } - /** * Check data type for one column. * Check for most commonly data type for one column. @@ -59,7 +23,7 @@ trait DatatypeTrait { * @return string|false */ private function getMostFrequentDataypeForColumn($datatypes) { - unset($datatypes['unknown']); + array_filter($datatypes); $typesFreq = array_count_values($datatypes); arsort($typesFreq); @@ -85,13 +49,14 @@ trait DatatypeTrait { $result = []; foreach ($this->titles as $cName) { $column = array_column($this->data, $cName); - $cDatatypes = array_map([$this, 'getDatatypeFromString'], $column); + + $cDatatypes = array_map('CSV\enums\DatatypeEnum::getValidTypeFromSample', $column); $result[$cName] = $this->getMostFrequentDataypeForColumn($cDatatypes); } $this->data_types = $result; - return !empty($this->data_types) ? $this->data_types : false; + return !empty($this->data_types) ? $this->data_types : []; } } From 48cec20f4f5ef010f3b73385cd32dbaa3dda23c3 Mon Sep 17 00:00:00 2001 From: Susann Sgorzaly Date: Sun, 18 Feb 2018 12:45:33 +0100 Subject: [PATCH 2/4] namespace change --- src/enums/DatatypeEnum.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/enums/DatatypeEnum.php b/src/enums/DatatypeEnum.php index 83d64d1..f389a5e 100644 --- a/src/enums/DatatypeEnum.php +++ b/src/enums/DatatypeEnum.php @@ -1,10 +1,10 @@ Date: Mon, 19 Feb 2018 12:02:48 +0100 Subject: [PATCH 3/4] corrected datatype count --- src/enums/DatatypeEnum.php | 38 ++++++++++++++++++-------------- src/extensions/DatatypeTrait.php | 14 ++++++++++-- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/enums/DatatypeEnum.php b/src/enums/DatatypeEnum.php index f389a5e..77ab684 100644 --- a/src/enums/DatatypeEnum.php +++ b/src/enums/DatatypeEnum.php @@ -5,8 +5,11 @@ namespace ParseCsv\enums; * Class DatatypeEnum * * @package ParseCsv\enums + * + * todo: needs a basic parent enum class for error handling. */ -class DatatypeEnum extends SplEnum { +class DatatypeEnum +{ const __DEFAULT = self::TYPE_STRING; @@ -33,16 +36,16 @@ class DatatypeEnum extends SplEnum { */ private static $validators = array( self::TYPE_STRING => null, - self::TYPE_FLOAT => 'isValidFloat', self::TYPE_INT => 'isValidInteger', self::TYPE_BOOL => 'isValidBoolean', + self::TYPE_FLOAT => 'isValidFloat', self::TYPE_DATE => 'isValidDate' ); /** * Checks data type for given string. * - * @param $value + * @param string $value * * @return bool|string */ @@ -58,9 +61,10 @@ class DatatypeEnum extends SplEnum { continue; } - if (method_exists(self, $validator)){ - call_user_func($validator($value)); - return $type; + if (method_exists(__CLASS__, $validator)){ + if (get_class()::$validator($value)) { + return $type; + } } return self::__DEFAULT; @@ -70,42 +74,42 @@ class DatatypeEnum extends SplEnum { /** * Check if string is float value. * - * @param $value + * @param string $value * - * @return false|int + * @return bool */ private static function isValidFloat($value) { - return preg_match(self::REGEX_FLOAT, $value); + return (bool) preg_match(self::REGEX_FLOAT, $value); } /** * Check if string is integer value. * - * @param $value + * @param string $value * - * @return false|int + * @return bool */ private static function isValidInteger($value) { - return preg_match(self::REGEX_INT, $value); + return (bool) preg_match(self::REGEX_INT, $value); } /** * Check if string is boolean. * - * @param $value + * @param string $value * - * @return false|int + * @return bool */ private static function isValidBoolean($value) { - return preg_match(self::REGEX_BOOL, $value); + return (bool) preg_match(self::REGEX_BOOL, $value); } /** * Check if string is date. * - * @param $value + * @param string $value * - * @return false|int + * @return bool */ private static function isValidDate($value) { return (bool) strtotime($value); diff --git a/src/extensions/DatatypeTrait.php b/src/extensions/DatatypeTrait.php index 61464f7..8c9ed37 100644 --- a/src/extensions/DatatypeTrait.php +++ b/src/extensions/DatatypeTrait.php @@ -25,7 +25,17 @@ trait DatatypeTrait { private function getMostFrequentDataypeForColumn($datatypes) { array_filter($datatypes); - $typesFreq = array_count_values($datatypes); + foreach ($datatypes as $value) { + echo gettype($value), "\n"; + } + + // workaround because array_count_values($datatypes) does not work anymore :-( + foreach ($datatypes as $type) { + $ids = array_keys($datatypes, $type); + $typesFreq[$type] = count($ids); + + $datatypes = array_diff_key($datatypes, array_flip($ids)); + } arsort($typesFreq); reset($typesFreq); @@ -50,7 +60,7 @@ trait DatatypeTrait { foreach ($this->titles as $cName) { $column = array_column($this->data, $cName); - $cDatatypes = array_map('CSV\enums\DatatypeEnum::getValidTypeFromSample', $column); + $cDatatypes = array_map('ParseCsv\enums\DatatypeEnum::getValidTypeFromSample', $column); $result[$cName] = $this->getMostFrequentDataypeForColumn($cDatatypes); } From 2c7c5525153f88e4de2a748add52da60a1ac637d Mon Sep 17 00:00:00 2001 From: susgo Date: Mon, 19 Feb 2018 12:07:32 +0100 Subject: [PATCH 4/4] Update DatatypeTrait.php --- src/extensions/DatatypeTrait.php | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/extensions/DatatypeTrait.php b/src/extensions/DatatypeTrait.php index 8c9ed37..50527e1 100644 --- a/src/extensions/DatatypeTrait.php +++ b/src/extensions/DatatypeTrait.php @@ -23,11 +23,7 @@ trait DatatypeTrait { * @return string|false */ private function getMostFrequentDataypeForColumn($datatypes) { - array_filter($datatypes); - - foreach ($datatypes as $value) { - echo gettype($value), "\n"; - } + array_filter($datatypes); // workaround because array_count_values($datatypes) does not work anymore :-( foreach ($datatypes as $type) {