std/parsecsv

  Source   Edit

This module implements a simple high performance CSV (comma separated value) parser.

Basic usage

import std/parsecsv
from std/os import paramStr
from std/streams import newFileStream

var s = newFileStream(paramStr(1), fmRead)
if s == nil:
  quit("cannot open the file" & paramStr(1))

var x: CsvParser
open(x, s, paramStr(1))
while readRow(x):
  echo "new row: "
  for val in items(x.row):
    echo "##", val, "##"
close(x)

For CSV files with a header row, the header can be read and then used as a reference for item access with rowEntry:

import std/parsecsv

# Prepare a file
let content = """One,Two,Three,Four
1,2,3,4
10,20,30,40
100,200,300,400
"""
writeFile("temp.csv", content)

var p: CsvParser
p.open("temp.csv")
p.readHeaderRow()
while p.readRow():
  echo "new row: "
  for col in items(p.headers):
    echo "##", col, ":", p.rowEntry(col), "##"
p.close()

See also

Types

CsvRow = seq[string]
A row in a CSV file.   Source   Edit
CsvParser = object of BaseLexer
  row*: CsvRow
  filename: string
  sep, quote, esc: char
  skipWhite: bool
  currRow: int
  headers*: seq[string]

The parser object.

It consists of two public fields:

  • row is the current row
  • headers are the columns that are defined in the csv file (read using readHeaderRow). Used with rowEntry).
  Source   Edit
CsvError = object of IOError
An exception that is raised if a parsing error occurs.   Source   Edit

Procs

proc open(self: var CsvParser; input: Stream; filename: string; separator = ',';
          quote = '\"'; escape = '\x00'; skipInitialSpace = false) {.
    ...raises: [IOError, OSError], tags: [ReadIOEffect].}
Initializes the parser with an input stream. Filename is only used for nice error messages. The parser's behaviour can be controlled by the diverse optional parameters:
  • separator: character used to separate fields
  • quote: Used to quote fields containing special characters like separator, quote or new-line characters. '\0' disables the parsing of quotes.
  • escape: removes any special meaning from the following character; '\0' disables escaping; if escaping is disabled and quote is not '\0', two quote characters are parsed one literal quote character.
  • skipInitialSpace: If true, whitespace immediately following the separator is ignored.

See also:

  • open proc which creates the file stream for you

Example:

import std/streams
var strm = newStringStream("One,Two,Three\n1,2,3\n10,20,30")
var parser: CsvParser
parser.open(strm, "tmp.csv")
parser.close()
strm.close()
  Source   Edit
proc open(self: var CsvParser; filename: string; separator = ','; quote = '\"';
          escape = '\x00'; skipInitialSpace = false) {.
    ...raises: [CsvError, IOError, OSError], tags: [ReadIOEffect].}
Similar to the other open proc, but creates the file stream for you.

Example:

from std/os import removeFile
writeFile("tmp.csv", "One,Two,Three\n1,2,3\n10,20,300")
var parser: CsvParser
parser.open("tmp.csv")
parser.close()
removeFile("tmp.csv")
  Source   Edit
proc processedRows(self: var CsvParser): int {.inline, ...raises: [], tags: [].}

Returns number of the processed rows.

But even if readRow arrived at EOF then processed rows counter is incremented.

Example:

import std/streams

var strm = newStringStream("One,Two,Three\n1,2,3")
var parser: CsvParser
parser.open(strm, "tmp.csv")
doAssert parser.readRow()
doAssert parser.processedRows() == 1
doAssert parser.readRow()
doAssert parser.processedRows() == 2
## Even if `readRow` arrived at EOF then `processedRows` is incremented.
doAssert parser.readRow() == false
doAssert parser.processedRows() == 3
doAssert parser.readRow() == false
doAssert parser.processedRows() == 4
parser.close()
strm.close()
  Source   Edit
proc readRow(self: var CsvParser; columns = 0): bool {.
    ...raises: [IOError, OSError, CsvError], tags: [ReadIOEffect].}

Reads the next row; if columns > 0, it expects the row to have exactly this many columns. Returns false if the end of the file has been encountered else true.

Blank lines are skipped.

Example:

import std/streams
var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30")
var parser: CsvParser
parser.open(strm, "tmp.csv")
doAssert parser.readRow()
doAssert parser.row == @["One", "Two", "Three"]
doAssert parser.readRow()
doAssert parser.row == @["1", "2", "3"]
## Blank lines are skipped.
doAssert parser.readRow()
doAssert parser.row == @["10", "20", "30"]

var emptySeq: seq[string]
doAssert parser.readRow() == false
doAssert parser.row == emptySeq
doAssert parser.readRow() == false
doAssert parser.row == emptySeq

parser.close()
strm.close()
  Source   Edit
proc close(self: var CsvParser) {.inline, ...raises: [Exception, IOError, OSError],
                                  tags: [WriteIOEffect].}
Closes the parser self and its associated input stream.   Source   Edit
proc readHeaderRow(self: var CsvParser) {....raises: [IOError, OSError, CsvError],
    tags: [ReadIOEffect].}
Reads the first row and creates a look-up table for column numbers See also:

Example:

import std/streams

var strm = newStringStream("One,Two,Three\n1,2,3")
var parser: CsvParser
parser.open(strm, "tmp.csv")

parser.readHeaderRow()
doAssert parser.headers == @["One", "Two", "Three"]
doAssert parser.row == @["One", "Two", "Three"]

doAssert parser.readRow()
doAssert parser.headers == @["One", "Two", "Three"]
doAssert parser.row == @["1", "2", "3"]

parser.close()
strm.close()
  Source   Edit
proc rowEntry(self: var CsvParser; entry: string): var string {.
    ...raises: [KeyError], tags: [].}

Accesses a specified entry from the current row.

Assumes that readHeaderRow has already been called.

If specified entry does not exist, raises KeyError.

Example:

import std/streams
var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30")
var parser: CsvParser
parser.open(strm, "tmp.csv")
## Requires calling `readHeaderRow`.
parser.readHeaderRow()
doAssert parser.readRow()
doAssert parser.rowEntry("One") == "1"
doAssert parser.rowEntry("Two") == "2"
doAssert parser.rowEntry("Three") == "3"
doAssertRaises(KeyError):
  discard parser.rowEntry("NonexistentEntry")
parser.close()
strm.close()
  Source   Edit