15 Commits

Author SHA1 Message Date
Fonata
f2b0aecd12 docs: remove Travis from README; make running act locally easier 2021-11-07 13:36:38 +01:00
Fonata
5d4643b201 Allow _guess_delimiter to work with a single row of data
Fix #206
2021-11-07 13:36:38 +01:00
d5606f8b2a ci(travis): remove .travis.yml config file
If we're switching to GitHub Actions, there's no need to keep the
Travis-CI config file around.
2021-11-07 13:36:38 +01:00
8cebcbd9bb ci(github): add GitHub Actions CI workflow
Can be tested locally via the local-ci make target, which requires
Docker and act (https://github.com/nektos/act).
2021-11-07 13:36:38 +01:00
518f5081fb chore: improve editorconfig for YAML and Makefile 2021-11-07 13:36:38 +01:00
Fonata
a28fc6ab0a refactor: simplify code without changing behavior 2021-11-07 13:36:38 +01:00
Fonata
009820d190 Explicit email address for security problems 2021-06-20 23:25:35 +02:00
Fonata
facdf1c06c Set release date 2021-06-20 23:25:26 +02:00
Fonata
ee13c17157 Apply PhpStorm source code formatting 2021-06-20 23:21:46 +02:00
Fonata
99daaa7235 Bugfix: $csv->parseFile now sets $this->data
This adds consistency because $csv->parse() does the same.

Fix #200
Fix #201
2021-06-20 23:21:46 +02:00
Fonata
05826c2bbf Make return value of parseFile() explicit as false if parsing failed
In practise this changes nothing because $this->file_data would typically be empty.
The only exception is if the object was reused from a previous parsing operation.
2021-06-20 23:21:46 +02:00
Fonata
731900effe DocBlock types: Use Psalm notation for 2-dimensional array 2021-06-20 23:21:46 +02:00
Fonata
913c3b1b94 Source code comments: add more information to DocBlocks 2021-06-20 23:21:46 +02:00
Fonata
96b2784d3c Source code comments: don’t repeat the field we are describing
The motivation for this change is how the PhpStorm IDE displays help texts.
Because the enter is shown as a space in the tooltips/help hovers, the
previous version was harder to read than after this commit.
2021-06-20 23:21:46 +02:00
Fonata
be01bc9ae4 README.md: Fix incorrect constructor usage 2021-05-03 21:26:56 +02:00
9 changed files with 179 additions and 116 deletions

View File

@@ -14,5 +14,8 @@ insert_final_newline = true
[composer.json] [composer.json]
indent_size = 4 indent_size = 4
[.travis.yml] [Makefile]
indent_style = tab
[*.yml,*.yaml]
indent_size = 2 indent_size = 2

38
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,38 @@
---
name: CI
on:
push:
jobs:
test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
php_version:
- "7.4"
- "7.3"
- "7.2"
- "7.1"
steps:
- uses: actions/checkout@v2
- uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php_version }}
env:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Get composer cache directory
id: composer-cache
run: echo "::set-output name=dir::$(composer config cache-files-dir)"
- name: Cache composer dependencies
uses: actions/cache@v2
with:
path: ${{ steps.composer-cache.outputs.dir }}
key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.lock') }}
restore-keys: ${{ runner.os }}-composer-
- name: Install dependencies
run: composer update
- name: Validate dependencies
run: composer validate
- name: Run tests
run: vendor/bin/phpunit --configuration tests/phpunit.xml

View File

@@ -1,24 +0,0 @@
dist: trusty
language: php
dist: trusty
php:
- '7.4'
- '7.3'
- '7.2'
- '7.1'
before_install:
- composer update
script:
- composer validate
- vendor/bin/phpunit --version
- vendor/bin/phpunit --configuration tests/phpunit.xml
notifications:
email:
recipients:
- will.knauss@gmail.com
on_success: never
on_failure: always

View File

@@ -1,3 +1,16 @@
ParseCSV 1.3.1
-----------------------------------
Date: 20-Jun-2021
Bugfix:
- `parseFile()` will now set `$csv->data`.
Until now, the parsed data was only returned.
This adds consistency with `$csv->parse()`
for the following operations on the object.
-----------------------------------
ParseCSV 1.3.0 ParseCSV 1.3.0
----------------------------------- -----------------------------------
Date: 14-Apr-2021 Date: 14-Apr-2021

View File

@@ -11,5 +11,21 @@ phpunit-dep:
exit 1 \ exit 1 \
) )
# Requires:
# - Docker: https://docker.com
# - act: https://github.com/nektos/act
local-ci:
ifeq (, $(shell which act))
define ACT_ERROR
Consider running the following to install 'act':
curl https://raw.githubusercontent.com/nektos/act/master/install.sh | sudo bash
The dependency 'act' was not found
endef
$(error ${ACT_ERROR})
endif
act -P ubuntu-latest=shivammathur/node:latest -W .github/workflows/ci.yml
.SILENT: .SILENT:
.PHONY: test phpunit-dep .PHONY: test phpunit-dep local-ci

View File

@@ -53,14 +53,7 @@ To use ParseCSV, you then have to add a `require 'parsecsv.lib.php';` line.
## Example Usage ## Example Usage
**General parsing** **Parse a tab-delimited CSV file with encoding conversion**
```php
$csv = new \ParseCsv\Csv('data.csv');
print_r($csv->data);
```
**Tab delimited, and encoding conversion**
```php ```php
$csv = new \ParseCsv\Csv(); $csv = new \ParseCsv\Csv();
@@ -70,7 +63,7 @@ $csv->parseFile('data.tsv');
print_r($csv->data); print_r($csv->data);
``` ```
**Auto-detect delimiter character** **Auto-detect field delimiter character**
```php ```php
$csv = new \ParseCsv\Csv(); $csv = new \ParseCsv\Csv();
@@ -152,6 +145,8 @@ $csv->save('data.csv', array(array('1986', 'Home', 'Nowhere', '')), /* append */
**Convert 2D array to CSV data and send headers to browser to treat output as **Convert 2D array to CSV data and send headers to browser to treat output as
a file and download it** a file and download it**
Your web app users would call this an export.
```php ```php
$csv = new \ParseCsv\Csv(); $csv = new \ParseCsv\Csv();
$csv->linefeed = "\n"; $csv->linefeed = "\n";
@@ -170,8 +165,16 @@ composer install
composer run test composer run test
```` ````
When pushing code to GitHub, tests will be executed using Travis CI. The relevant configuration is in the When pushing code to GitHub, tests will be executed using GitHub Actions. The relevant configuration is in the
file `.travis.yml`. file `.github/workflows/ci.yml`. To run the `test` action locally, you can execute the following command:
````bash
make local-ci
````
## Security
If you discover any security related issues, please email ParseCsv@blaeul.de instead of using GitHub issues.
## Credits ## Credits

View File

@@ -49,7 +49,7 @@ class Csv {
*/ */
/** /**
* Heading * Header row:
* Use first line/entry as field names * Use first line/entry as field names
* *
* @var bool * @var bool
@@ -57,7 +57,6 @@ class Csv {
public $heading = true; public $heading = true;
/** /**
* Fields
* Override field names * Override field names
* *
* @var array * @var array
@@ -65,7 +64,6 @@ class Csv {
public $fields = array(); public $fields = array();
/** /**
* Sort By
* Sort CSV by this field * Sort CSV by this field
* *
* @var string|null * @var string|null
@@ -73,15 +71,13 @@ class Csv {
public $sort_by = null; public $sort_by = null;
/** /**
* Sort Reverse * Reverse the sort direction
* Reverse the sort function
* *
* @var bool * @var bool
*/ */
public $sort_reverse = false; public $sort_reverse = false;
/** /**
* Sort Type
* Sort behavior passed to sort methods * Sort behavior passed to sort methods
* *
* regular = SORT_REGULAR * regular = SORT_REGULAR
@@ -93,31 +89,34 @@ class Csv {
public $sort_type = SortEnum::SORT_TYPE_REGULAR; public $sort_type = SortEnum::SORT_TYPE_REGULAR;
/** /**
* Delimiter * Field delimiter character
* Delimiter character
* *
* @var string * @var string
*/ */
public $delimiter = ','; public $delimiter = ',';
/** /**
* Enclosure
* Enclosure character * Enclosure character
* *
* This is useful for cell values that are either multi-line
* or contain the field delimiter character.
*
* @var string * @var string
*/ */
public $enclosure = '"'; public $enclosure = '"';
/** /**
* Enclose All * Force enclosing all columns.
* Force enclosing all columns *
* If false, only cells that are either multi-line or
* contain the field delimiter character are enclosed
* in the $enclosure char.
* *
* @var bool * @var bool
*/ */
public $enclose_all = false; public $enclose_all = false;
/** /**
* Conditions
* Basic SQL-Like conditions for row matching * Basic SQL-Like conditions for row matching
* *
* @var string|null * @var string|null
@@ -125,7 +124,6 @@ class Csv {
public $conditions = null; public $conditions = null;
/** /**
* Offset
* Number of rows to ignore from beginning of data. If present, the heading * Number of rows to ignore from beginning of data. If present, the heading
* row is also counted (if $this->heading == true). In other words, * row is also counted (if $this->heading == true). In other words,
* $offset == 1 and $offset == 0 have the same meaning in that situation. * $offset == 1 and $offset == 0 have the same meaning in that situation.
@@ -135,7 +133,6 @@ class Csv {
public $offset = null; public $offset = null;
/** /**
* Limit
* Limits the number of returned rows to the specified amount * Limits the number of returned rows to the specified amount
* *
* @var int|null * @var int|null
@@ -143,7 +140,6 @@ class Csv {
public $limit = null; public $limit = null;
/** /**
* Auto Depth
* Number of rows to analyze when attempting to auto-detect delimiter * Number of rows to analyze when attempting to auto-detect delimiter
* *
* @var int * @var int
@@ -151,7 +147,6 @@ class Csv {
public $auto_depth = 15; public $auto_depth = 15;
/** /**
* Auto Non Chars
* Characters that should be ignored when attempting to auto-detect delimiter * Characters that should be ignored when attempting to auto-detect delimiter
* *
* @var string * @var string
@@ -159,7 +154,6 @@ class Csv {
public $auto_non_chars = "a-zA-Z0-9\n\r"; public $auto_non_chars = "a-zA-Z0-9\n\r";
/** /**
* Auto Preferred
* preferred delimiter characters, only used when all filtering method * preferred delimiter characters, only used when all filtering method
* returns multiple possible delimiters (happens very rarely) * returns multiple possible delimiters (happens very rarely)
* *
@@ -168,15 +162,14 @@ class Csv {
public $auto_preferred = ",;\t.:|"; public $auto_preferred = ",;\t.:|";
/** /**
* Convert Encoding
* Should we convert the CSV character encoding? * Should we convert the CSV character encoding?
* Used for both parse and unparse operations.
* *
* @var bool * @var bool
*/ */
public $convert_encoding = false; public $convert_encoding = false;
/** /**
* Input Encoding
* Set the input encoding * Set the input encoding
* *
* @var string * @var string
@@ -184,7 +177,6 @@ class Csv {
public $input_encoding = 'ISO-8859-1'; public $input_encoding = 'ISO-8859-1';
/** /**
* Output Encoding
* Set the output encoding * Set the output encoding
* *
* @var string * @var string
@@ -202,15 +194,14 @@ class Csv {
public $use_mb_convert_encoding = false; public $use_mb_convert_encoding = false;
/** /**
* Linefeed
* Line feed characters used by unparse, save, and output methods * Line feed characters used by unparse, save, and output methods
* Popular choices are "\r\n" and "\n".
* *
* @var string * @var string
*/ */
public $linefeed = "\r"; public $linefeed = "\r";
/** /**
* Output Delimiter
* Sets the output delimiter used by the output method * Sets the output delimiter used by the output method
* *
* @var string * @var string
@@ -218,7 +209,6 @@ class Csv {
public $output_delimiter = ','; public $output_delimiter = ',';
/** /**
* Output filename
* Sets the output filename * Sets the output filename
* *
* @var string * @var string
@@ -226,7 +216,6 @@ class Csv {
public $output_filename = 'data.csv'; public $output_filename = 'data.csv';
/** /**
* Keep File Data
* keep raw file data in memory after successful parsing (useful for debugging) * keep raw file data in memory after successful parsing (useful for debugging)
* *
* @var bool * @var bool
@@ -270,7 +259,6 @@ class Csv {
public $error = 0; public $error = 0;
/** /**
* Error Information
* Detailed error information * Detailed error information
* *
* @var array * @var array
@@ -298,17 +286,16 @@ class Csv {
public $titles = array(); public $titles = array();
/** /**
* Data * Two-dimensional array of CSV data.
* Two-dimensional array of CSV data * The first dimension are the line numbers. Each line is represented as an array with field names as keys.
* *
* @var array * @var array<array>
*/ */
public $data = array(); public $data = array();
use DatatypeTrait; use DatatypeTrait;
/** /**
* Constructor
* Class constructor * Class constructor
* *
* @param string|null $data The CSV string or a direct file path. * @param string|null $data The CSV string or a direct file path.
@@ -368,7 +355,6 @@ class Csv {
// ============================================== // ==============================================
/** /**
* Parse
* Parse a CSV file or string * Parse a CSV file or string
* *
* @param string|null $dataString The CSV string or a direct file path * @param string|null $dataString The CSV string or a direct file path
@@ -415,8 +401,7 @@ class Csv {
} }
/** /**
* Save * Save changes, or write a new file and/or data.
* Save changes, or write a new file and/or data
* *
* @param string $file File location to save to * @param string $file File location to save to
* @param array $data 2D array of data * @param array $data 2D array of data
@@ -440,8 +425,9 @@ class Csv {
} }
/** /**
* Output * Generate a CSV-based string for output.
* Generate a CSV based string for output. *
* Useful for exports in web applications.
* *
* @param string|null $filename If a filename is specified here or in the * @param string|null $filename If a filename is specified here or in the
* object, headers and data will be output * object, headers and data will be output
@@ -485,11 +471,15 @@ class Csv {
} }
/** /**
* Encoding
* Convert character encoding * Convert character encoding
* *
* @param string|null $input Input character encoding, uses default if left blank * Specify the encoding to use for the next parsing or unparsing.
* Calling this function will not change the data held in the object immediately.
*
* @param string|null $input Input character encoding
* If the value null is passed, the existing input encoding remains set (default: ISO-8859-1).
* @param string|null $output Output character encoding, uses default if left blank * @param string|null $output Output character encoding, uses default if left blank
* If the value null is passed, the existing input encoding remains set (default: ISO-8859-1).
* *
* @return void * @return void
*/ */
@@ -505,8 +495,7 @@ class Csv {
} }
/** /**
* Auto * Auto-detect delimiter: Find delimiter by analyzing a specific number of
* Auto-Detect Delimiter: Find delimiter by analyzing a specific number of
* rows to determine most probable delimiter character * rows to determine most probable delimiter character
* *
* @param string|null $file Local CSV file * @param string|null $file Local CSV file
@@ -609,7 +598,6 @@ class Csv {
// ============================================== // ==============================================
/** /**
* Parse File
* Read file to string and call _parse_string() * Read file to string and call _parse_string()
* *
* @param string|null $file Path to a CSV file. * @param string|null $file Path to a CSV file.
@@ -617,23 +605,26 @@ class Csv {
* the path may also contain a protocol: * the path may also contain a protocol:
* https://example.org/some/file.csv * https://example.org/some/file.csv
* *
* @return array|bool * @return array<array>|false
*/ */
public function parseFile($file = null) { public function parseFile($file = null) {
if (is_null($file)) { if (is_null($file)) {
$file = $this->file; $file = $this->file;
} }
if (empty($this->file_data)) { /**
/** * @see self::keep_file_data
* @see self::keep_file_data * Usually, _parse_string will clean this
* Usually, _parse_string will clean this * Instead of leaving stale data for the next parseFile call behind.
* Instead of leaving stale data for the next parseFile call behind. */
*/ if (empty($this->file_data) && !$this->loadFile($file)) {
$this->load_data($file); return false;
} }
return !empty($this->file_data) ? $this->_parse_string() : false; if (empty($this->file_data)) {
return false;
}
return $this->data = $this->_parse_string();
} }
/** /**
@@ -647,7 +638,8 @@ class Csv {
* *
* @param string|null $data CSV data * @param string|null $data CSV data
* *
* @return array|false - 2D array with CSV data, or false on failure * @return array<array>|false
* 2D array with CSV data, or false on failure
*/ */
protected function _parse_string($data = null) { protected function _parse_string($data = null) {
if (empty($data)) { if (empty($data)) {
@@ -1234,14 +1226,14 @@ class Csv {
$file = $this->file; $file = $this->file;
} }
return $this->load_data($file); return $this->loadFile($file);
} }
return true; return true;
} }
/** /**
* Check if passed info might be delimiter * Check if passed info might be delimiter.
* Only used by find_delimiter * Only used by find_delimiter
* *
* @param string $char Potential field separating character * @param string $char Potential field separating character
@@ -1256,7 +1248,7 @@ class Csv {
$first = null; $first = null;
$equal = null; $equal = null;
$almost = false; $almost = false;
foreach ($array as $key => $value) { foreach ($array as $value) {
if ($first == null) { if ($first == null) {
$first = $value; $first = $value;
} elseif ($value == $first && $equal !== false) { } elseif ($value == $first && $equal !== false) {
@@ -1269,7 +1261,7 @@ class Csv {
} }
} }
if ($equal) { if ($equal || $depth === 1) {
$match = $almost ? 2 : 1; $match = $almost ? 2 : 1;
$pref = strpos($preferred, $char); $pref = strpos($preferred, $char);
$pref = ($pref !== false) ? str_pad($pref, 3, '0', STR_PAD_LEFT) : '999'; $pref = ($pref !== false) ? str_pad($pref, 3, '0', STR_PAD_LEFT) : '999';
@@ -1413,7 +1405,7 @@ class Csv {
$is_newline = ($ch == "\n" && $pch != "\r") || $ch == "\r"; $is_newline = ($ch == "\n" && $pch != "\r") || $ch == "\r";
if ($ch == $enclosure) { if ($ch == $enclosure) {
if (!$enclosed || $nch != $enclosure) { if (!$enclosed || $nch != $enclosure) {
$enclosed = $enclosed ? false : true; $enclosed = !$enclosed;
} elseif ($enclosed) { } elseif ($enclosed) {
$i++; $i++;
} }

View File

@@ -0,0 +1 @@
C1,C2,C3
1 C1 C2 C3

View File

@@ -121,6 +121,15 @@ class ParseTest extends TestCase {
self::assertEquals($expected, $this->csv->data); self::assertEquals($expected, $this->csv->data);
} }
public function testSingleRow() {
$this->csv->auto(__DIR__ . '/../example_files/single_row.csv');
self::assertEquals([], $this->csv->data, 'Single row is detected as header');
$this->csv->heading = false;
$this->csv->auto(__DIR__ . '/../example_files/single_row.csv');
$expected = [['C1', 'C2', 'C3']];
self::assertEquals($expected, $this->csv->data);
}
public function testMatomoData() { public function testMatomoData() {
// Matomo (Piwik) export cannot be read with // Matomo (Piwik) export cannot be read with
$this->csv->use_mb_convert_encoding = true; $this->csv->use_mb_convert_encoding = true;
@@ -166,18 +175,22 @@ class ParseTest extends TestCase {
// This also tests if ::load_data removed the BOM from the data; // This also tests if ::load_data removed the BOM from the data;
// otherwise the 'title' column would have 3 extra bytes. // otherwise the 'title' column would have 3 extra bytes.
$this->assertEquals([ $this->assertEquals(
'title', [
'isbn', 'title',
'publishedAt', 'isbn',
], array_keys(reset($this->csv->data))); 'publishedAt',
],
array_keys(reset($this->csv->data)));
$titles = array_column($this->csv->data, 'title'); $titles = array_column($this->csv->data, 'title');
$this->assertEquals([ $this->assertEquals(
'Красивая кулинария', [
'The Wine Connoisseurs', 'Красивая кулинария',
'Weißwein', 'The Wine Connoisseurs',
], $titles); 'Weißwein',
],
$titles);
} }
public function testWithMultipleNewlines() { public function testWithMultipleNewlines() {
@@ -185,18 +198,20 @@ class ParseTest extends TestCase {
$aElse9 = array_column($this->csv->data, 'else9'); $aElse9 = array_column($this->csv->data, 'else9');
/** @noinspection SpellCheckingInspection */ /** @noinspection SpellCheckingInspection */
$this->assertEquals([ $this->assertEquals(
'Abweichung', [
'Abweichung', 'Abweichung',
'Abweichung', 'Abweichung',
'Alt', 'Abweichung',
'Fehlt', 'Alt',
'Neu', 'Fehlt',
'OK', 'Neu',
'Fehlt', 'OK',
'Fehlt', 'Fehlt',
'Fehlt', 'Fehlt',
], $aElse9); 'Fehlt',
],
$aElse9);
} }
/** /**
@@ -298,9 +313,9 @@ class ParseTest extends TestCase {
/** /**
* Call protected/private method of a class. * Call protected/private method of a class.
* *
* @param object $object Instantiated object that we will run method on. * @param object $object Instantiated object that we will run method on.
* @param string $methodName Method name to call * @param string $methodName Method name to call
* @param array $parameters Array of parameters to pass into method. * @param array $parameters Array of parameters to pass into method.
* *
* @return mixed Method return. * @return mixed Method return.
*/ */
@@ -331,4 +346,10 @@ class ParseTest extends TestCase {
self::assertFalse($this->csv->parseFile('')); self::assertFalse($this->csv->parseFile(''));
self::assertFalse($this->csv->parseFile(null)); self::assertFalse($this->csv->parseFile(null));
} }
public function testParseFile() {
$data = $this->csv->parseFile(__DIR__ . '/fixtures/auto-double-enclosure.csv');
self::assertCount(2, $data);
self::assertEquals($data, $this->csv->data);
}
} }