1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package csv
53
54 import (
55 "bufio"
56 "bytes"
57 "errors"
58 "fmt"
59 "io"
60 "unicode"
61 "unicode/utf8"
62 )
63
64
65
66 type ParseError struct {
67 StartLine int
68 Line int
69 Column int
70 Err error
71 }
72
73 func (e *ParseError) Error() string {
74 if e.Err == ErrFieldCount {
75 return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
76 }
77 if e.StartLine != e.Line {
78 return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
79 }
80 return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
81 }
82
83 func (e *ParseError) Unwrap() error { return e.Err }
84
85
86 var (
87 ErrBareQuote = errors.New("bare \" in non-quoted-field")
88 ErrQuote = errors.New("extraneous or missing \" in quoted-field")
89 ErrFieldCount = errors.New("wrong number of fields")
90
91
92 ErrTrailingComma = errors.New("extra delimiter at end of line")
93 )
94
95 var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
96
97 func validDelim(r rune) bool {
98 return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
99 }
100
101
102
103
104
105
106
107
108
109
110 type Reader struct {
111
112
113
114
115 Comma rune
116
117
118
119
120
121
122
123
124 Comment rune
125
126
127
128
129
130
131
132 FieldsPerRecord int
133
134
135
136 LazyQuotes bool
137
138
139
140 TrimLeadingSpace bool
141
142
143
144
145 ReuseRecord bool
146
147
148 TrailingComma bool
149
150 r *bufio.Reader
151
152
153 numLine int
154
155
156 offset int64
157
158
159 rawBuffer []byte
160
161
162
163
164
165 recordBuffer []byte
166
167
168
169 fieldIndexes []int
170
171
172
173 fieldPositions []position
174
175
176 lastRecord []string
177 }
178
179
180 func NewReader(r io.Reader) *Reader {
181 return &Reader{
182 Comma: ',',
183 r: bufio.NewReader(r),
184 }
185 }
186
187
188
189
190
191
192
193
194
195 func (r *Reader) Read() (record []string, err error) {
196 if r.ReuseRecord {
197 record, err = r.readRecord(r.lastRecord)
198 r.lastRecord = record
199 } else {
200 record, err = r.readRecord(nil)
201 }
202 return record, err
203 }
204
205
206
207
208
209
210
211 func (r *Reader) FieldPos(field int) (line, column int) {
212 if field < 0 || field >= len(r.fieldPositions) {
213 panic("out of range index passed to FieldPos")
214 }
215 p := &r.fieldPositions[field]
216 return p.line, p.col
217 }
218
219
220
221
222 func (r *Reader) InputOffset() int64 {
223 return r.offset
224 }
225
226
227 type position struct {
228 line, col int
229 }
230
231
232
233
234
235
236 func (r *Reader) ReadAll() (records [][]string, err error) {
237 for {
238 record, err := r.readRecord(nil)
239 if err == io.EOF {
240 return records, nil
241 }
242 if err != nil {
243 return nil, err
244 }
245 records = append(records, record)
246 }
247 }
248
249
250
251
252
253 func (r *Reader) readLine() ([]byte, error) {
254 line, err := r.r.ReadSlice('\n')
255 if err == bufio.ErrBufferFull {
256 r.rawBuffer = append(r.rawBuffer[:0], line...)
257 for err == bufio.ErrBufferFull {
258 line, err = r.r.ReadSlice('\n')
259 r.rawBuffer = append(r.rawBuffer, line...)
260 }
261 line = r.rawBuffer
262 }
263 readSize := len(line)
264 if readSize > 0 && err == io.EOF {
265 err = nil
266
267 if line[readSize-1] == '\r' {
268 line = line[:readSize-1]
269 }
270 }
271 r.numLine++
272 r.offset += int64(readSize)
273
274 if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
275 line[n-2] = '\n'
276 line = line[:n-1]
277 }
278 return line, err
279 }
280
281
282 func lengthNL(b []byte) int {
283 if len(b) > 0 && b[len(b)-1] == '\n' {
284 return 1
285 }
286 return 0
287 }
288
289
290 func nextRune(b []byte) rune {
291 r, _ := utf8.DecodeRune(b)
292 return r
293 }
294
295 func (r *Reader) readRecord(dst []string) ([]string, error) {
296 if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
297 return nil, errInvalidDelim
298 }
299
300
301 var line []byte
302 var errRead error
303 for errRead == nil {
304 line, errRead = r.readLine()
305 if r.Comment != 0 && nextRune(line) == r.Comment {
306 line = nil
307 continue
308 }
309 if errRead == nil && len(line) == lengthNL(line) {
310 line = nil
311 continue
312 }
313 break
314 }
315 if errRead == io.EOF {
316 return nil, errRead
317 }
318
319
320 var err error
321 const quoteLen = len(`"`)
322 commaLen := utf8.RuneLen(r.Comma)
323 recLine := r.numLine
324 r.recordBuffer = r.recordBuffer[:0]
325 r.fieldIndexes = r.fieldIndexes[:0]
326 r.fieldPositions = r.fieldPositions[:0]
327 pos := position{line: r.numLine, col: 1}
328 parseField:
329 for {
330 if r.TrimLeadingSpace {
331 i := bytes.IndexFunc(line, func(r rune) bool {
332 return !unicode.IsSpace(r)
333 })
334 if i < 0 {
335 i = len(line)
336 pos.col -= lengthNL(line)
337 }
338 line = line[i:]
339 pos.col += i
340 }
341 if len(line) == 0 || line[0] != '"' {
342
343 i := bytes.IndexRune(line, r.Comma)
344 field := line
345 if i >= 0 {
346 field = field[:i]
347 } else {
348 field = field[:len(field)-lengthNL(field)]
349 }
350
351 if !r.LazyQuotes {
352 if j := bytes.IndexByte(field, '"'); j >= 0 {
353 col := pos.col + j
354 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
355 break parseField
356 }
357 }
358 r.recordBuffer = append(r.recordBuffer, field...)
359 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
360 r.fieldPositions = append(r.fieldPositions, pos)
361 if i >= 0 {
362 line = line[i+commaLen:]
363 pos.col += i + commaLen
364 continue parseField
365 }
366 break parseField
367 } else {
368
369 fieldPos := pos
370 line = line[quoteLen:]
371 pos.col += quoteLen
372 for {
373 i := bytes.IndexByte(line, '"')
374 if i >= 0 {
375
376 r.recordBuffer = append(r.recordBuffer, line[:i]...)
377 line = line[i+quoteLen:]
378 pos.col += i + quoteLen
379 switch rn := nextRune(line); {
380 case rn == '"':
381
382 r.recordBuffer = append(r.recordBuffer, '"')
383 line = line[quoteLen:]
384 pos.col += quoteLen
385 case rn == r.Comma:
386
387 line = line[commaLen:]
388 pos.col += commaLen
389 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
390 r.fieldPositions = append(r.fieldPositions, fieldPos)
391 continue parseField
392 case lengthNL(line) == len(line):
393
394 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
395 r.fieldPositions = append(r.fieldPositions, fieldPos)
396 break parseField
397 case r.LazyQuotes:
398
399 r.recordBuffer = append(r.recordBuffer, '"')
400 default:
401
402 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote}
403 break parseField
404 }
405 } else if len(line) > 0 {
406
407 r.recordBuffer = append(r.recordBuffer, line...)
408 if errRead != nil {
409 break parseField
410 }
411 pos.col += len(line)
412 line, errRead = r.readLine()
413 if len(line) > 0 {
414 pos.line++
415 pos.col = 1
416 }
417 if errRead == io.EOF {
418 errRead = nil
419 }
420 } else {
421
422 if !r.LazyQuotes && errRead == nil {
423 err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote}
424 break parseField
425 }
426 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
427 r.fieldPositions = append(r.fieldPositions, fieldPos)
428 break parseField
429 }
430 }
431 }
432 }
433 if err == nil {
434 err = errRead
435 }
436
437
438
439 str := string(r.recordBuffer)
440 dst = dst[:0]
441 if cap(dst) < len(r.fieldIndexes) {
442 dst = make([]string, len(r.fieldIndexes))
443 }
444 dst = dst[:len(r.fieldIndexes)]
445 var preIdx int
446 for i, idx := range r.fieldIndexes {
447 dst[i] = str[preIdx:idx]
448 preIdx = idx
449 }
450
451
452 if r.FieldsPerRecord > 0 {
453 if len(dst) != r.FieldsPerRecord && err == nil {
454 err = &ParseError{
455 StartLine: recLine,
456 Line: recLine,
457 Column: 1,
458 Err: ErrFieldCount,
459 }
460 }
461 } else if r.FieldsPerRecord == 0 {
462 r.FieldsPerRecord = len(dst)
463 }
464 return dst, err
465 }
466
View as plain text