Skip to content

Commit 1c4bd21

Browse files
authored
Merge pull request #4 from clue-labs/multiline
Support parsing multiline CSV values with newlines in cell values
2 parents 51e4306 + 8be0dc2 commit 1c4bd21

File tree

4 files changed

+75
-24
lines changed

4 files changed

+75
-24
lines changed

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,15 @@ Carol,40
6464
Dave,30
6565
```
6666

67-
CSV allows handling field values that contain spaces or the delimiting comma
68-
(think of URLs or user-provided descriptions) by enclosing them with quotes like
69-
this:
67+
CSV allows handling field values that contain spaces, the delimiting comma or
68+
even newline characters (think of URLs or user-provided descriptions) by
69+
enclosing them with quotes like this:
7070

7171
```
7272
name,comment
7373
Alice,"Yes, I like cheese"
74-
Bob,"Hello World!"
74+
Bob,"Hello
75+
World!"
7576
```
7677

7778
> Note that these more advanced parsing rules are often handled inconsistently

src/Decoder.php

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
class Decoder extends EventEmitter implements ReadableStreamInterface
1414
{
1515
private $input;
16-
private $temp = false;
1716

1817
private $delimiter;
1918
private $enclosure;
2019
private $escapeChar;
2120
private $maxlength;
2221

2322
private $buffer = '';
23+
private $offset = 0;
2424
private $closed = false;
2525

2626
public function __construct(ReadableStreamInterface $input, $delimiter = ',', $enclosure = '"', $escapeChar = '\\', $maxlength = 65536)
@@ -35,8 +35,6 @@ public function __construct(ReadableStreamInterface $input, $delimiter = ',', $e
3535
return $this->close();
3636
}
3737

38-
$this->temp = fopen('php://memory', 'r+');
39-
4038
$this->input->on('data', array($this, 'handleData'));
4139
$this->input->on('end', array($this, 'handleEnd'));
4240
$this->input->on('error', array($this, 'handleError'));
@@ -56,12 +54,6 @@ public function close()
5654

5755
$this->closed = true;
5856
$this->buffer = '';
59-
60-
if ($this->temp !== false) {
61-
fclose($this->temp);
62-
$this->temp = false;
63-
}
64-
6557
$this->input->close();
6658

6759
$this->emit('close');
@@ -91,21 +83,34 @@ public function handleData($data)
9183
$this->buffer .= $data;
9284

9385
// keep parsing while a newline has been found
94-
while (($newline = strpos($this->buffer, "\n")) !== false && $newline <= $this->maxlength) {
95-
// read data up until newline and remove from buffer
96-
ftruncate($this->temp, 0);
97-
fwrite($this->temp, (string)substr($this->buffer, 0, $newline));
98-
rewind($this->temp);
99-
$this->buffer = (string)substr($this->buffer, $newline + 1);
100-
101-
$data = fgetcsv($this->temp, 0, $this->delimiter, $this->enclosure, $this->escapeChar);
102-
103-
// abort stream if decoding failed
104-
if ($data === false) {
86+
while (($newline = \strpos($this->buffer, "\n", $this->offset)) !== false && $newline <= $this->maxlength) {
87+
// read data up until newline and try to parse
88+
$data = \str_getcsv(
89+
\substr($this->buffer, 0, $newline + 1),
90+
$this->delimiter,
91+
$this->enclosure,
92+
$this->escapeChar
93+
);
94+
95+
// unable to decode? abort
96+
if ($data === false || \end($data) === null) {
10597
$this->handleError(new \RuntimeException('Unable to decode CSV'));
10698
return;
10799
}
108100

101+
// the last parsed cell value ends with a newline and the buffer does not end with end quote?
102+
// this looks like a multiline value, so only remember offset and wait for next newline
103+
$last = \substr(\end($data), -1);
104+
\reset($data);
105+
if ($last === "\n" && ($newline === 1 || $this->buffer[$newline - 1] !== $this->enclosure)) {
106+
$this->offset = $newline + 1;
107+
continue;
108+
}
109+
110+
// parsing successful => remove from buffer and emit
111+
$this->buffer = (string)\substr($this->buffer, $newline + 1);
112+
$this->offset = 0;
113+
109114
$this->emit('data', array($data));
110115
}
111116

@@ -121,6 +126,10 @@ public function handleEnd()
121126
$this->handleData("\n");
122127
}
123128

129+
if ($this->buffer !== '') {
130+
$this->handleError(new \RuntimeException('Unable to decode CSV'));
131+
}
132+
124133
if (!$this->closed) {
125134
$this->emit('end');
126135
$this->close();

tests/DecoderTest.php

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,34 @@ public function testEmitDataStringWillForward()
6060
$this->input->emit('data', array("\"hello\"\n"));
6161
}
6262

63+
public function testEmitDataStringWithNewlineWillForward()
64+
{
65+
$this->decoder->on('data', $this->expectCallableOnceWith(array("hello" . "\n" . "world")));
66+
67+
$this->input->emit('data', array("\"hello\nworld\"\n"));
68+
}
69+
70+
public function testEmitDataStringWithMultiNewlineWillForward()
71+
{
72+
$this->decoder->on('data', $this->expectCallableOnceWith(array("hello" . "\n\n" . "world")));
73+
74+
$this->input->emit('data', array("\"hello\n\nworld\"\n"));
75+
}
76+
77+
public function testEmitDataStringEndsWithNewlineWillForward()
78+
{
79+
$this->decoder->on('data', $this->expectCallableOnceWith(array("hello" . "\n")));
80+
81+
$this->input->emit('data', array("\"hello\n\"\n"));
82+
}
83+
84+
public function testEmitDataStringOnlyNewlineWillForward()
85+
{
86+
$this->decoder->on('data', $this->expectCallableOnceWith(array("\n")));
87+
88+
$this->input->emit('data', array("\"\n\"\n"));
89+
}
90+
6391
public function testEmitDataWithoutNewlineWillNotForward()
6492
{
6593
$this->decoder->on('data', $this->expectCallableNever());
@@ -90,6 +118,7 @@ public function testEmitDataErrorWillForwardError()
90118
$this->decoder->on('error', $this->expectCallableOnce());
91119

92120
$this->input->emit('data', array("\"hello\\\"test\n"));
121+
$this->input->emit('end');
93122
}
94123

95124
public function testEmitDataErrorInMultipleChunksWillForwardError()
@@ -99,6 +128,7 @@ public function testEmitDataErrorInMultipleChunksWillForwardError()
99128

100129
$this->input->emit('data', array("\"hello"));
101130
$this->input->emit('data', array("\\\"test\n"));
131+
$this->input->emit('end');
102132
}
103133

104134
public function testEmitDataWithExactBufferSizeWillForward()

tests/EncoderTest.php

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ public function testWriteArrayWithStringWithSpacesUsesEnclosing()
6262
$this->encoder->write(array('hello world'));
6363
}
6464

65+
public function testWriteArrayWithStringWithNewlineUsesEnclosing()
66+
{
67+
$this->output = $this->getMockBuilder('React\Stream\WritableStreamInterface')->getMock();
68+
$this->output->expects($this->once())->method('isWritable')->willReturn(true);
69+
$this->encoder = new Encoder($this->output);
70+
71+
$this->output->expects($this->once())->method('write')->with("\"hello\nworld\"\n");
72+
73+
$this->encoder->write(array("hello\nworld"));
74+
}
75+
6576
public function testWriteArrayWithSpecialStringUsesEnclosing()
6677
{
6778
$this->output = $this->getMockBuilder('React\Stream\WritableStreamInterface')->getMock();

0 commit comments

Comments
 (0)