From 70366e308566ce065982fb5a58c28b11b0d4ff46 Mon Sep 17 00:00:00 2001 From: zynode Date: Fri, 11 Apr 2008 18:13:37 +0000 Subject: [PATCH] parseCSV 0.4 beta - Error reporting for files/data which is corrupt or has formatting errors like using double quotes in a field without enclosing quotes. Or not escaping double quotes with a second one. - parse() method does not require input anymore if the "$object->file" property has been set. I'm calling this a beta release due to the heavy modifications to the core parsing logic required for error reporting to work. I have tested the new code quite extensively, I'm fairly confident that it still parses exactly as it always has. The second reason I'm calling it a beta release is cause I'm sure the error reporting code will need more refinements and tweaks to detect more types of errors, as it's only picking two types or syntax errors right now. However, it seems these two are the most common errors that you would be likely to come across. git-svn-id: http://parsecsv-for-php.googlecode.com/svn/trunk@28 339761fc-0c37-0410-822d-8b8cac1f6a97 --- ChangeLog.txt | 29 +++++++++++++++++++++++++++++ examples/_books.csv | 28 ++++++++++++++-------------- parsecsv.lib.php | 23 ++++++++++++----------- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index 46741e3..7ae63c3 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,3 +1,32 @@ +parseCSV 0.4 beta +----------------------------------- +Date: 11-Apr-2008 + +- Error reporting for files/data which is corrupt + or has formatting errors like using double + quotes in a field without enclosing quotes. Or + not escaping double quotes with a second one. + +- parse() method does not require input anymore + if the "$object->file" property has been set. + +I'm calling this a beta release due to the heavy +modifications to the core parsing logic required +for error reporting to work. I have tested the +new code quite extensively, I'm fairly confident +that it still parses exactly as it always has. + +The second reason I'm calling it a beta release +is cause I'm sure the error reporting code will +need more refinements and tweaks to detect more +types of errors, as it's only picking two types +or syntax errors right now. However, it seems +these two are the most common errors that you +would be likely to come across. + +----------------------------------- + + parseCSV 0.3.2 ----------------------------------- Date: 1-Apr-2008 diff --git a/examples/_books.csv b/examples/_books.csv index d0394e9..3e4586c 100644 --- a/examples/_books.csv +++ b/examples/_books.csv @@ -1,15 +1,15 @@ -rating,title,author,type,asin,tags,review -0,The Killing Kind,John Connolly,Book,0340771224,,i still haven't had time to read this one... -0,The Third Secret,Steve Berry,Book,0340899263,,need to find time to read this book -3,The Last Templar,Raymond Khoury,Book,0752880705,, -5,The Traveller,John Twelve Hawks,Book,059305430X,, -4,Crisis Four,Andy Mcnab,Book,0345428080,, -5,Prey,Michael Crichton,Book,0007154534,, -3,The Broker (Paperback),John Grisham,Book,0440241588,book johngrisham,"good book, but is slow in the middle" -3,Without Blood (Paperback),Alessandro Baricco,Book,1841955744,, -5,State of Fear (Paperback),Michael Crichton,Book,0061015733,, -4,The Rule of Four (Paperback),Ian Caldwell,Book,0099451956,book bestseller, -4,Deception Point (Paperback),Dan Brown,Book,0671027387,book danbrown bestseller, -5,Digital Fortress : A Thriller (Mass Market Paperback),Dan Brown,Book,0312995423,book danbrown bestseller, -5,Angels & Demons (Mass Market Paperback),Dan Brown,Book,0671027360,book danbrown bestseller, +rating,title,author,type,asin,tags,review +0,The Killing Kind,John Connolly,Book,0340771224,,i still haven't had time to read this one... +0,The Third Secret,Steve Berry,Book,0340899263,,need to find time to read this book +3,The Last Templar,Raymond Khoury,Book,0752880705,, +5,The Traveller,John Twelve Hawks,Book,059305430X,, +4,Crisis Four,Andy Mcnab,Book,0345428080,, +5,Prey,Michael Crichton,Book,0007154534,, +3,The Broker (Paperback),John Grisham,Book,0440241588,book johngrisham,"good book, but is slow in the middle" +3,Without Blood (Paperback),Alessandro Baricco,Book,1841955744,, +5,State of Fear (Paperback),Michael Crichton,Book,0061015733,, +4,The Rule of Four (Paperback),Ian Caldwell,Book,0099451956,book bestseller, +4,Deception Point (Paperback),Dan Brown,Book,0671027387,book danbrown bestseller, +5,Digital Fortress : A Thriller (Mass Market Paperback),Dan Brown,Book,0312995423,book danbrown bestseller, +5,Angels & Demons (Mass Market Paperback),Dan Brown,Book,0671027360,book danbrown bestseller, 4,The Da Vinci Code (Hardcover),Dan Brown," Book ",0385504209,book movie danbrown bestseller davinci, \ No newline at end of file diff --git a/parsecsv.lib.php b/parsecsv.lib.php index 3c497f2..d0e3e9d 100644 --- a/parsecsv.lib.php +++ b/parsecsv.lib.php @@ -4,7 +4,7 @@ class parseCSV { /* - Class: parseCSV v0.3.3 beta + Class: parseCSV v0.4 beta http://code.google.com/p/parsecsv-for-php/ @@ -284,7 +284,7 @@ class parseCSV { $ch = $data{$i}; $nch = ( isset($data{$i+1}) ) ? $data{$i+1} : false ; $pch = ( isset($data{$i-1}) ) ? $data{$i-1} : false ; - + // open and closing quotes if ( $ch == $enclosure ) { if ( !$enclosed || $nch != $enclosure ) { @@ -292,7 +292,7 @@ class parseCSV { } elseif ( $enclosed ) { $i++; } - + // end of row } elseif ( ($ch == "\n" && $pch != "\r" || $ch == "\r") && !$enclosed ) { if ( $n >= $search_depth ) { @@ -392,6 +392,7 @@ class parseCSV { $error_col = $col + 1; if ( !isset($this->error_info[$error_row.'-'.$error_col]) ) { $this->error_info[$error_row.'-'.$error_col] = array( + 'type' => 2, 'info' => 'Syntax error found on row '.$error_row.'. Non-enclosed fields can not contain double-quotes.', 'row' => $error_row, 'field' => $error_col, @@ -409,15 +410,18 @@ class parseCSV { $enclosed = false; $i = $x; } else { - $this->error = 1; + if ( $this->error < 1 ) { + $this->error = 1; + } $error_row = count($rows) + 1; $error_col = $col + 1; if ( !isset($this->error_info[$error_row.'-'.$error_col]) ) { $this->error_info[$error_row.'-'.$error_col] = array( + 'type' => 1, 'info' => 'Syntax error found on row '.(count($rows) + 1).'. '. - 'Fields containing double quotes must be enclosed with double quotes. '. - 'Additionally, two double quotes must be used within an enclosed field rather than a single one.', + 'A single double-quote was found within an enclosed string. '. + 'Enclosed double-quotes must be escaped with a second double-quote.', 'row' => count($rows) + 1, 'field' => $col + 1, 'field_name' => (!empty($head[$col])) ? $head[$col] : null, @@ -429,18 +433,15 @@ class parseCSV { } else { $enclosed = false; } - + // end of field/row } elseif ( ($ch == $this->delimiter || $ch == "\n" || $ch == "\r") && !$enclosed ) { $key = ( !empty($head[$col]) ) ? $head[$col] : $col ; - if ( $was_enclosed && $current{strlen($current)-1} == $this->enclosure ) { - $current = substr($current, 0, -1); - } $row[$key] = ( $was_enclosed ) ? $current : trim($current) ; $current = ''; $was_enclosed = false; $col++; - + // end of row if ( $ch == "\n" || $ch == "\r" ) { if ( $this->_validate_offset($row_count) && $this->_validate_row_conditions($row, $this->conditions) ) {