From 6bbd4b02f4c2ae28e098394e635455af3fd4d4c0 Mon Sep 17 00:00:00 2001 From: "Stephen S. Musoke" Date: Tue, 15 May 2018 15:16:03 +0300 Subject: [PATCH 1/5] #29 Enable MacOS Line breaks as acceptable --- src/CsvFile.php | 4 ++-- tests/CsvFileTest.php | 19 +------------------ 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/src/CsvFile.php b/src/CsvFile.php index ee9892e..d0a0ecc 100644 --- a/src/CsvFile.php +++ b/src/CsvFile.php @@ -405,12 +405,12 @@ public function validateLineBreak() Exception::INVALID_PARAM_STR ); } - if (in_array($lineBreak, ["\r\n", "\n"])) { + if (in_array($lineBreak, ["\r\n", "\n", "\r"])) { return $lineBreak; } throw new InvalidArgumentException( - "Invalid line break. Please use unix \\n or win \\r\\n line breaks.", + "Invalid line break. Please use unix \\n or win \\r\\n or Mac \\r line breaks.", Exception::INVALID_PARAM, null, Exception::INVALID_PARAM_STR diff --git a/tests/CsvFileTest.php b/tests/CsvFileTest.php index 050dab3..7fec707 100644 --- a/tests/CsvFileTest.php +++ b/tests/CsvFileTest.php @@ -172,24 +172,7 @@ public function validLineBreaksData() ['test-input.win.csv', "\r\n", '\r\n'], ['escaping.csv', "\n", '\n'], ['just-header.csv', "\n", '\n'], // default - ]; - } - - /** - * @expectedException \Keboola\Csv\InvalidArgumentException - * @dataProvider invalidLineBreaksData - * @param string $file - */ - public function testInvalidLineBreak($file) - { - $csvFile = new CsvFile(__DIR__ . '/data/' . $file); - $csvFile->validateLineBreak(); - } - - public function invalidLineBreaksData() - { - return [ - ['test-input.mac.csv'], + ['test-input.mac.csv', "\r", '\r'] ]; } From 6214084b870dc0bbec20ca273267926ad94ce77f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Halam=C3=AD=C4=8Dek?= Date: Fri, 18 May 2018 10:16:30 +0200 Subject: [PATCH 2/5] mac lines parsing failing test --- tests/CsvFileTest.php | 2 +- tests/data/escaping.mac.csv | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tests/data/escaping.mac.csv diff --git a/tests/CsvFileTest.php b/tests/CsvFileTest.php index 7fec707..2f4253a 100644 --- a/tests/CsvFileTest.php +++ b/tests/CsvFileTest.php @@ -65,7 +65,7 @@ public function validCsvFiles() public function testParse() { - $csvFile = new CsvFile(__DIR__ . '/data/escaping.csv', ",", '"'); + $csvFile = new CsvFile(__DIR__ . '/data/escaping.mac.csv', ",", '"'); $rows = []; foreach ($csvFile as $row) { diff --git a/tests/data/escaping.mac.csv b/tests/data/escaping.mac.csv new file mode 100644 index 0000000..c0d1df5 --- /dev/null +++ b/tests/data/escaping.mac.csv @@ -0,0 +1 @@ +col1,col2 line without enclosure,second column "enclosure "" in column","hello \" "line with enclosure","second column" "column with enclosure "", and comma inside text","second column enclosure in text """ "columns with new line","columns with tab" "Columns with WINDOWS new line", "second" "column with \n \t \\","second col" \ No newline at end of file From 31df913dc10f17f6f92556700f831b6f4c0fd4be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Halam=C3=AD=C4=8Dek?= Date: Fri, 18 May 2018 10:29:53 +0200 Subject: [PATCH 3/5] line breaks tests --- tests/CsvFileTest.php | 23 +++++++++++++++++++++-- tests/data/escaping.mac.csv | 3 ++- tests/data/escaping.win.csv | 10 ++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 tests/data/escaping.win.csv diff --git a/tests/CsvFileTest.php b/tests/CsvFileTest.php index 2f4253a..30bf721 100644 --- a/tests/CsvFileTest.php +++ b/tests/CsvFileTest.php @@ -63,9 +63,13 @@ public function validCsvFiles() ]; } - public function testParse() + /** + * @dataProvider testParseProvider + * @throws \Keboola\Csv\InvalidArgumentException + */ + public function testParse($inputFilePath) { - $csvFile = new CsvFile(__DIR__ . '/data/escaping.mac.csv', ",", '"'); + $csvFile = new CsvFile($inputFilePath, ",", '"'); $rows = []; foreach ($csvFile as $row) { @@ -102,6 +106,21 @@ public function testParse() self::assertEquals($expected, $rows); } + public function testParseProvider() + { + return [ + 'linux' => [ + __DIR__ . '/data/escaping.csv', + ], + 'mac' => [ + __DIR__ . '/data/escaping.mac.csv', + ], + 'win' => [ + __DIR__ . '/data/escaping.win.csv' + ], + ]; + } + public function testParseEscapedBy() { $csvFile = new CsvFile(__DIR__ . '/data/escapingEscapedBy.csv', ",", '"', '\\'); diff --git a/tests/data/escaping.mac.csv b/tests/data/escaping.mac.csv index c0d1df5..9dda005 100644 --- a/tests/data/escaping.mac.csv +++ b/tests/data/escaping.mac.csv @@ -1 +1,2 @@ -col1,col2 line without enclosure,second column "enclosure "" in column","hello \" "line with enclosure","second column" "column with enclosure "", and comma inside text","second column enclosure in text """ "columns with new line","columns with tab" "Columns with WINDOWS new line", "second" "column with \n \t \\","second col" \ No newline at end of file +col1,col2 line without enclosure,second column "enclosure "" in column","hello \" "line with enclosure","second column" "column with enclosure "", and comma inside text","second column enclosure in text """ "columns with new line","columns with tab" "Columns with WINDOWS +new line", "second" "column with \n \t \\","second col" diff --git a/tests/data/escaping.win.csv b/tests/data/escaping.win.csv new file mode 100644 index 0000000..0db477b --- /dev/null +++ b/tests/data/escaping.win.csv @@ -0,0 +1,10 @@ +col1,col2 +line without enclosure,second column +"enclosure "" in column","hello \" +"line with enclosure","second column" +"column with enclosure "", and comma inside text","second column enclosure in text """ +"columns with +new line","columns with tab" +"Columns with WINDOWS +new line", "second" +"column with \n \t \\","second col" From 2792ba8e9127cadfaf42d1f8b7eda133dfc04ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Halam=C3=AD=C4=8Dek?= Date: Fri, 18 May 2018 10:44:48 +0200 Subject: [PATCH 4/5] valid mac line breaks test data --- tests/data/escaping.mac.csv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/data/escaping.mac.csv b/tests/data/escaping.mac.csv index 9dda005..13a926e 100644 --- a/tests/data/escaping.mac.csv +++ b/tests/data/escaping.mac.csv @@ -1,2 +1,3 @@ -col1,col2 line without enclosure,second column "enclosure "" in column","hello \" "line with enclosure","second column" "column with enclosure "", and comma inside text","second column enclosure in text """ "columns with new line","columns with tab" "Columns with WINDOWS -new line", "second" "column with \n \t \\","second col" +col1,col2 line without enclosure,second column "enclosure "" in column","hello \" "line with enclosure","second column" "column with enclosure "", and comma inside text","second column enclosure in text """ "columns with +new line","columns with tab" "Columns with WINDOWS +new line", "second" "column with \n \t \\","second col" From 5b068d3a181e6172dba0dea018aad4de8b2a3fa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Halam=C3=AD=C4=8Dek?= Date: Fri, 18 May 2018 10:54:07 +0200 Subject: [PATCH 5/5] mac line breaks poc fix --- src/CsvFile.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CsvFile.php b/src/CsvFile.php index d0a0ecc..c7fae1a 100644 --- a/src/CsvFile.php +++ b/src/CsvFile.php @@ -210,6 +210,7 @@ protected function detectLineBreak() protected function readLine() { $this->validateLineBreak(); + ini_set('auto_detect_line_endings', '1'); // allow empty enclosure hack $enclosure = !$this->getEnclosure() ? chr(0) : $this->getEnclosure();