diff --git a/ChangeLog.txt b/ChangeLog.txt index 46741e3..7ae63c3 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,3 +1,32 @@ +parseCSV 0.4 beta +----------------------------------- +Date: 11-Apr-2008 + +- Error reporting for files/data which is corrupt + or has formatting errors like using double + quotes in a field without enclosing quotes. Or + not escaping double quotes with a second one. + +- parse() method does not require input anymore + if the "$object->file" property has been set. + +I'm calling this a beta release due to the heavy +modifications to the core parsing logic required +for error reporting to work. I have tested the +new code quite extensively, I'm fairly confident +that it still parses exactly as it always has. + +The second reason I'm calling it a beta release +is cause I'm sure the error reporting code will +need more refinements and tweaks to detect more +types of errors, as it's only picking two types +or syntax errors right now. However, it seems +these two are the most common errors that you +would be likely to come across. + +----------------------------------- + + parseCSV 0.3.2 ----------------------------------- Date: 1-Apr-2008 diff --git a/examples/_books.csv b/examples/_books.csv index d0394e9..3e4586c 100644 --- a/examples/_books.csv +++ b/examples/_books.csv @@ -1,15 +1,15 @@ -rating,title,author,type,asin,tags,review -0,The Killing Kind,John Connolly,Book,0340771224,,i still haven't had time to read this one... -0,The Third Secret,Steve Berry,Book,0340899263,,need to find time to read this book -3,The Last Templar,Raymond Khoury,Book,0752880705,, -5,The Traveller,John Twelve Hawks,Book,059305430X,, -4,Crisis Four,Andy Mcnab,Book,0345428080,, -5,Prey,Michael Crichton,Book,0007154534,, -3,The Broker (Paperback),John Grisham,Book,0440241588,book johngrisham,"good book, but is slow in the middle" -3,Without Blood (Paperback),Alessandro Baricco,Book,1841955744,, -5,State of Fear (Paperback),Michael Crichton,Book,0061015733,, -4,The Rule of Four (Paperback),Ian Caldwell,Book,0099451956,book bestseller, -4,Deception Point (Paperback),Dan Brown,Book,0671027387,book danbrown bestseller, -5,Digital Fortress : A Thriller (Mass Market Paperback),Dan Brown,Book,0312995423,book danbrown bestseller, -5,Angels & Demons (Mass Market Paperback),Dan Brown,Book,0671027360,book danbrown bestseller, +rating,title,author,type,asin,tags,review +0,The Killing Kind,John Connolly,Book,0340771224,,i still haven't had time to read this one... +0,The Third Secret,Steve Berry,Book,0340899263,,need to find time to read this book +3,The Last Templar,Raymond Khoury,Book,0752880705,, +5,The Traveller,John Twelve Hawks,Book,059305430X,, +4,Crisis Four,Andy Mcnab,Book,0345428080,, +5,Prey,Michael Crichton,Book,0007154534,, +3,The Broker (Paperback),John Grisham,Book,0440241588,book johngrisham,"good book, but is slow in the middle" +3,Without Blood (Paperback),Alessandro Baricco,Book,1841955744,, +5,State of Fear (Paperback),Michael Crichton,Book,0061015733,, +4,The Rule of Four (Paperback),Ian Caldwell,Book,0099451956,book bestseller, +4,Deception Point (Paperback),Dan Brown,Book,0671027387,book danbrown bestseller, +5,Digital Fortress : A Thriller (Mass Market Paperback),Dan Brown,Book,0312995423,book danbrown bestseller, +5,Angels & Demons (Mass Market Paperback),Dan Brown,Book,0671027360,book danbrown bestseller, 4,The Da Vinci Code (Hardcover),Dan Brown," Book ",0385504209,book movie danbrown bestseller davinci, \ No newline at end of file diff --git a/parsecsv.lib.php b/parsecsv.lib.php index 3c497f2..d0e3e9d 100644 --- a/parsecsv.lib.php +++ b/parsecsv.lib.php @@ -4,7 +4,7 @@ class parseCSV { /* - Class: parseCSV v0.3.3 beta + Class: parseCSV v0.4 beta http://code.google.com/p/parsecsv-for-php/ @@ -284,7 +284,7 @@ class parseCSV { $ch = $data{$i}; $nch = ( isset($data{$i+1}) ) ? $data{$i+1} : false ; $pch = ( isset($data{$i-1}) ) ? $data{$i-1} : false ; - + // open and closing quotes if ( $ch == $enclosure ) { if ( !$enclosed || $nch != $enclosure ) { @@ -292,7 +292,7 @@ class parseCSV { } elseif ( $enclosed ) { $i++; } - + // end of row } elseif ( ($ch == "\n" && $pch != "\r" || $ch == "\r") && !$enclosed ) { if ( $n >= $search_depth ) { @@ -392,6 +392,7 @@ class parseCSV { $error_col = $col + 1; if ( !isset($this->error_info[$error_row.'-'.$error_col]) ) { $this->error_info[$error_row.'-'.$error_col] = array( + 'type' => 2, 'info' => 'Syntax error found on row '.$error_row.'. Non-enclosed fields can not contain double-quotes.', 'row' => $error_row, 'field' => $error_col, @@ -409,15 +410,18 @@ class parseCSV { $enclosed = false; $i = $x; } else { - $this->error = 1; + if ( $this->error < 1 ) { + $this->error = 1; + } $error_row = count($rows) + 1; $error_col = $col + 1; if ( !isset($this->error_info[$error_row.'-'.$error_col]) ) { $this->error_info[$error_row.'-'.$error_col] = array( + 'type' => 1, 'info' => 'Syntax error found on row '.(count($rows) + 1).'. '. - 'Fields containing double quotes must be enclosed with double quotes. '. - 'Additionally, two double quotes must be used within an enclosed field rather than a single one.', + 'A single double-quote was found within an enclosed string. '. + 'Enclosed double-quotes must be escaped with a second double-quote.', 'row' => count($rows) + 1, 'field' => $col + 1, 'field_name' => (!empty($head[$col])) ? $head[$col] : null, @@ -429,18 +433,15 @@ class parseCSV { } else { $enclosed = false; } - + // end of field/row } elseif ( ($ch == $this->delimiter || $ch == "\n" || $ch == "\r") && !$enclosed ) { $key = ( !empty($head[$col]) ) ? $head[$col] : $col ; - if ( $was_enclosed && $current{strlen($current)-1} == $this->enclosure ) { - $current = substr($current, 0, -1); - } $row[$key] = ( $was_enclosed ) ? $current : trim($current) ; $current = ''; $was_enclosed = false; $col++; - + // end of row if ( $ch == "\n" || $ch == "\r" ) { if ( $this->_validate_offset($row_count) && $this->_validate_row_conditions($row, $this->conditions) ) {