mirror of
https://github.com/parsecsv/parsecsv-for-php.git
synced 2026-02-19 08:36:39 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70366e3085 | ||
|
|
2dfd35b988 | ||
|
|
ae00f949f0 | ||
|
|
4e76da5eff | ||
|
|
7762e71316 |
@@ -1,3 +1,71 @@
|
||||
parseCSV 0.4 beta
|
||||
-----------------------------------
|
||||
Date: 11-Apr-2008
|
||||
|
||||
- Error reporting for files/data which is corrupt
|
||||
or has formatting errors like using double
|
||||
quotes in a field without enclosing quotes. Or
|
||||
not escaping double quotes with a second one.
|
||||
|
||||
- parse() method does not require input anymore
|
||||
if the "$object->file" property has been set.
|
||||
|
||||
I'm calling this a beta release due to the heavy
|
||||
modifications to the core parsing logic required
|
||||
for error reporting to work. I have tested the
|
||||
new code quite extensively, I'm fairly confident
|
||||
that it still parses exactly as it always has.
|
||||
|
||||
The second reason I'm calling it a beta release
|
||||
is cause I'm sure the error reporting code will
|
||||
need more refinements and tweaks to detect more
|
||||
types of errors, as it's only picking two types
|
||||
or syntax errors right now. However, it seems
|
||||
these two are the most common errors that you
|
||||
would be likely to come across.
|
||||
|
||||
-----------------------------------
|
||||
|
||||
|
||||
parseCSV 0.3.2
|
||||
-----------------------------------
|
||||
Date: 1-Apr-2008
|
||||
|
||||
This is primarily a bug-fix release for a critical
|
||||
bug which was brought to my attention.
|
||||
|
||||
- Fixed a critical bug in conditions parsing which
|
||||
would generate corrupt matching patterns causing
|
||||
the condition(s) to not work at all in some
|
||||
situations.
|
||||
|
||||
- Fixed a small code error which would cause PHP to
|
||||
generate a invalid offset notice when zero length
|
||||
values were fed into the unparse() method to
|
||||
generate CSV data from an array.
|
||||
|
||||
Notice: If you have been using the "parsecsv-stable"
|
||||
branch as an external in any of your projects,
|
||||
please use the "stable/parsecsv" branch from this
|
||||
point on as I will eventually remove the former due
|
||||
to it's stupid naming.
|
||||
|
||||
-----------------------------------
|
||||
|
||||
|
||||
parseCSV 0.3.1
|
||||
-----------------------------------
|
||||
Date: 1-Sep-2007
|
||||
|
||||
- Small change to default output settings to
|
||||
conform with RFC 4180 (http://rfc.net/rfc4180.html).
|
||||
Only the LF (line feed) character was used
|
||||
by default to separate rows, rather than
|
||||
CRLF (carriage return & line feed).
|
||||
|
||||
-----------------------------------
|
||||
|
||||
|
||||
parseCSV 0.3.0
|
||||
-----------------------------------
|
||||
Date: 9-Aug-2007
|
||||
@@ -18,6 +86,9 @@ Date: 9-Aug-2007
|
||||
- Minor changes and optimizations, and a few
|
||||
spelling corrections. Oops :)
|
||||
|
||||
- Included more complex code examples in the
|
||||
parseCSV download.
|
||||
|
||||
-----------------------------------
|
||||
|
||||
|
||||
|
||||
@@ -13,7 +13,11 @@ $csv = new parseCSV();
|
||||
# Parse '_books.csv' using automatic delimiter detection...
|
||||
$csv->auto('_books.csv');
|
||||
|
||||
# ...or if you know the delimiter, use the parse() function.
|
||||
# ...or if you know the delimiter, set the delimiter character
|
||||
# if its not the default comma...
|
||||
// $csv->delimiter = "\t"; # tab delimited
|
||||
|
||||
# ...and then use the parse() function.
|
||||
// $csv->parse('_books.csv');
|
||||
|
||||
|
||||
|
||||
100
parsecsv.lib.php
100
parsecsv.lib.php
@@ -4,7 +4,7 @@ class parseCSV {
|
||||
|
||||
/*
|
||||
|
||||
Class: parseCSV v0.3.0
|
||||
Class: parseCSV v0.4 beta
|
||||
http://code.google.com/p/parsecsv-for-php/
|
||||
|
||||
|
||||
@@ -123,7 +123,7 @@ class parseCSV {
|
||||
var $output_encoding = 'ISO-8859-1';
|
||||
|
||||
# used by unparse(), save(), and output() functions
|
||||
var $linefeed = "\n";
|
||||
var $linefeed = "\r\n";
|
||||
|
||||
# only used by output() function
|
||||
var $output_delimiter = ',';
|
||||
@@ -140,6 +140,19 @@ class parseCSV {
|
||||
# loaded file contents
|
||||
var $file_data;
|
||||
|
||||
# error while parsing input data
|
||||
# 0 = No errors found. Everything should be fine :)
|
||||
# 1 = Hopefully correctable syntax error was found.
|
||||
# 2 = Enclosure character (double quote by default)
|
||||
# was found in non-enclosed field. This means
|
||||
# the file is either corrupt, or does not
|
||||
# standard CSV formatting. Please validate
|
||||
# the parsed data yourself.
|
||||
var $error = 0;
|
||||
|
||||
# detailed error info
|
||||
var $error_info = array();
|
||||
|
||||
# array of field values in data parsed
|
||||
var $titles = array();
|
||||
|
||||
@@ -170,6 +183,7 @@ class parseCSV {
|
||||
* @return nothing
|
||||
*/
|
||||
function parse ($input = null, $offset = null, $limit = null, $conditions = null) {
|
||||
if ( $input === null ) $input = $this->file;
|
||||
if ( !empty($input) ) {
|
||||
if ( $offset !== null ) $this->offset = $offset;
|
||||
if ( $limit !== null ) $this->limit = $limit;
|
||||
@@ -272,12 +286,12 @@ class parseCSV {
|
||||
$pch = ( isset($data{$i-1}) ) ? $data{$i-1} : false ;
|
||||
|
||||
// open and closing quotes
|
||||
if ( $ch == $enclosure && (!$enclosed || $nch != $enclosure) ) {
|
||||
if ( $ch == $enclosure ) {
|
||||
if ( !$enclosed || $nch != $enclosure ) {
|
||||
$enclosed = ( $enclosed ) ? false : true ;
|
||||
|
||||
// inline quotes
|
||||
} elseif ( $ch == $enclosure && $enclosed ) {
|
||||
} elseif ( $enclosed ) {
|
||||
$i++;
|
||||
}
|
||||
|
||||
// end of row
|
||||
} elseif ( ($ch == "\n" && $pch != "\r" || $ch == "\r") && !$enclosed ) {
|
||||
@@ -311,13 +325,12 @@ class parseCSV {
|
||||
|
||||
// capture most probable delimiter
|
||||
ksort($filtered);
|
||||
$delimiter = reset($filtered);
|
||||
$this->delimiter = $delimiter;
|
||||
$this->delimiter = reset($filtered);
|
||||
|
||||
// parse data
|
||||
if ( $parse ) $this->data = $this->parse_string();
|
||||
|
||||
return $delimiter;
|
||||
return $this->delimiter;
|
||||
|
||||
}
|
||||
|
||||
@@ -349,6 +362,8 @@ class parseCSV {
|
||||
} else return false;
|
||||
}
|
||||
|
||||
$white_spaces = str_replace($this->delimiter, '', " \t\x0B\0");
|
||||
|
||||
$rows = array();
|
||||
$row = array();
|
||||
$row_count = 0;
|
||||
@@ -365,22 +380,66 @@ class parseCSV {
|
||||
$nch = ( isset($data{$i+1}) ) ? $data{$i+1} : false ;
|
||||
$pch = ( isset($data{$i-1}) ) ? $data{$i-1} : false ;
|
||||
|
||||
// open and closing quotes
|
||||
if ( $ch == $this->enclosure && (!$enclosed || $nch != $this->enclosure) ) {
|
||||
$enclosed = ( $enclosed ) ? false : true ;
|
||||
if ( $enclosed ) $was_enclosed = true;
|
||||
|
||||
// inline quotes
|
||||
} elseif ( $ch == $this->enclosure && $enclosed ) {
|
||||
// open/close quotes, and inline quotes
|
||||
if ( $ch == $this->enclosure ) {
|
||||
if ( !$enclosed ) {
|
||||
if ( ltrim($current, $white_spaces) == '' ) {
|
||||
$enclosed = true;
|
||||
$was_enclosed = true;
|
||||
} else {
|
||||
$this->error = 2;
|
||||
$error_row = count($rows) + 1;
|
||||
$error_col = $col + 1;
|
||||
if ( !isset($this->error_info[$error_row.'-'.$error_col]) ) {
|
||||
$this->error_info[$error_row.'-'.$error_col] = array(
|
||||
'type' => 2,
|
||||
'info' => 'Syntax error found on row '.$error_row.'. Non-enclosed fields can not contain double-quotes.',
|
||||
'row' => $error_row,
|
||||
'field' => $error_col,
|
||||
'field_name' => (!empty($head[$col])) ? $head[$col] : null,
|
||||
);
|
||||
}
|
||||
$current .= $ch;
|
||||
}
|
||||
} elseif ($nch == $this->enclosure) {
|
||||
$current .= $ch;
|
||||
$i++;
|
||||
} elseif ( $nch != $this->delimiter && $nch != "\r" && $nch != "\n" ) {
|
||||
for ( $x=($i+1); isset($data{$x}) && ltrim($data{$x}, $white_spaces) == ''; $x++ ) {}
|
||||
if ( $data{$x} == $this->delimiter ) {
|
||||
$enclosed = false;
|
||||
$i = $x;
|
||||
} else {
|
||||
if ( $this->error < 1 ) {
|
||||
$this->error = 1;
|
||||
}
|
||||
$error_row = count($rows) + 1;
|
||||
$error_col = $col + 1;
|
||||
if ( !isset($this->error_info[$error_row.'-'.$error_col]) ) {
|
||||
$this->error_info[$error_row.'-'.$error_col] = array(
|
||||
'type' => 1,
|
||||
'info' =>
|
||||
'Syntax error found on row '.(count($rows) + 1).'. '.
|
||||
'A single double-quote was found within an enclosed string. '.
|
||||
'Enclosed double-quotes must be escaped with a second double-quote.',
|
||||
'row' => count($rows) + 1,
|
||||
'field' => $col + 1,
|
||||
'field_name' => (!empty($head[$col])) ? $head[$col] : null,
|
||||
);
|
||||
}
|
||||
$current .= $ch;
|
||||
$enclosed = false;
|
||||
}
|
||||
} else {
|
||||
$enclosed = false;
|
||||
}
|
||||
|
||||
// end of field/row
|
||||
} elseif ( ($ch == $this->delimiter || ($ch == "\n" && $pch != "\r") || $ch == "\r") && !$enclosed ) {
|
||||
if ( !$was_enclosed ) $current = trim($current);
|
||||
} elseif ( ($ch == $this->delimiter || $ch == "\n" || $ch == "\r") && !$enclosed ) {
|
||||
$key = ( !empty($head[$col]) ) ? $head[$col] : $col ;
|
||||
$row[$key] = $current;
|
||||
$row[$key] = ( $was_enclosed ) ? $current : trim($current) ;
|
||||
$current = '';
|
||||
$was_enclosed = false;
|
||||
$col++;
|
||||
|
||||
// end of row
|
||||
@@ -405,6 +464,7 @@ class parseCSV {
|
||||
if ( $this->sort_by === null && $this->limit !== null && count($rows) == $this->limit ) {
|
||||
$i = $strlen;
|
||||
}
|
||||
if ( $ch == "\r" && $nch == "\n" ) $i++;
|
||||
}
|
||||
|
||||
// append character to current field
|
||||
@@ -601,12 +661,14 @@ class parseCSV {
|
||||
* @return Processed value
|
||||
*/
|
||||
function _enclose_value ($value = null) {
|
||||
if ( $value !== null && $value != '' ) {
|
||||
$delimiter = preg_quote($this->delimiter, '/');
|
||||
$enclosure = preg_quote($this->enclosure, '/');
|
||||
if ( preg_match("/".$delimiter."|".$enclosure."|\n|\r/i", $value) || ($value{0} == ' ' || substr($value, -1) == ' ') ) {
|
||||
$value = str_replace($this->enclosure, $this->enclosure.$this->enclosure, $value);
|
||||
$value = $this->enclosure.$value.$this->enclosure;
|
||||
}
|
||||
}
|
||||
return $value;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user