std/nre2

Search:
Source   Edit  

What is NRE2?

A regular expression library for Nim to replace deprecated NRE. It is implemented with Regex , that is pure Nim regex engine and guarantees linear time matching. It supports compiling regex and matching at compile-time and works with JS backend.

NRE2 is mostly compatible with NRE and the syntax of regular expression is similar to PCRE. But it lacks a few features and how to set options in a pattern is different.

The syntax of regular expression is explained in https://nitely.github.io/nim-regex/regex.html

Example:

import std/nre2
import std/sugar
let vowels = re"[aeoui]"
let bounds = collect:
  for match in "moiga".findIter(vowels): match.matchBounds
assert bounds == @[1 .. 1, 2 .. 2, 4 .. 4]
from std/sequtils import toSeq
let s = sequtils.toSeq("moiga".findIter(vowels))
  # fully qualified to avoid confusion with nre.toSeq
assert s.len == 3

let firstVowel = "foo".find(vowels)
let hasVowel = firstVowel.isSome()
assert hasVowel
let matchBounds = firstVowel.get().captureBounds[-1]
assert matchBounds.a == 1

# as with module `re`, unless specified otherwise, `start` parameter in each
# proc indicates where the scan starts, but outputs are relative to the start
# of the input string, not to `start`:
assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
assert find("uxabc", re"ab", start = 3).isNone

Types

CaptureBounds {.borrow: `.`.} = distinct RegexMatch
Source   Edit  
Captures {.borrow: `.`.} = distinct RegexMatch
Source   Edit  
Regex = regex.Regex2
Represents the pattern that things are matched against, constructed with re(string). Examples: re"foo", re(r"(?x)foo #comment")
captureCount: int
the number of captures that the pattern has.
captureNameId: Table[string, int]
a table from the capture names to their numeric id.

The syntax of regular expression of Regex is explained in https://nitely.github.io/nim-regex/regex.html

Source   Edit  
RegexMatch = object
  str*: string               ## The string that was matched against.
Usually seen as Option[RegexMatch], it represents the result of an execution. On failure, it is none, on success, it is some.
str: string
the string that was matched against
captures[]: string
the string value of whatever was captured at that id. If the value is invalid, then behavior is undefined. If the id is -1, then the whole match is returned. If the given capture was not matched, nil is returned. See examples for match.
captureBounds[]: HSlice[int, int]
gets the bounds of the given capture according to the same rules as the above. If the capture is not filled, then None is returned. The bounds are both inclusive. See examples for match.
match: string
the full text of the match.
matchBounds: HSlice[int, int]
the bounds of the match, as in captureBounds[]
(captureBounds|captures).toTable
returns a table with each named capture as a key.
(captureBounds|captures).toSeq
returns all the captures by their number.
$: string
same as match
Source   Edit  

Procs

func `$`(match: RegexMatch): string {....raises: [], tags: [], forbids: [].}
Source   Edit  
func `[]`(match: CaptureBounds; i: int): HSlice[int, int] {.inline, ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
func `[]`(match: CaptureBounds; name: string): HSlice[int, int] {.inline,
    ...raises: [KeyError], tags: [], forbids: [].}
Source   Edit  
func `[]`(match: Captures; i: int): string {.inline, ...raises: [], tags: [],
    forbids: [].}
Source   Edit  
func `[]`(match: Captures; name: string): string {.inline, ...raises: [KeyError],
    tags: [], forbids: [].}
Source   Edit  
func captureBounds(match: RegexMatch): CaptureBounds {.inline, ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
func captureCount(pattern: Regex): int {.inline, ...raises: [], tags: [],
    forbids: [].}
Source   Edit  
func captureNameId(pattern: Regex): Table[string, int] {....raises: [], tags: [],
    forbids: [].}
Source   Edit  
func captures(match: RegexMatch): Captures {.inline, ...raises: [], tags: [],
    forbids: [].}
Source   Edit  
func contains(match: CaptureBounds or Captures; name: string): bool {.inline.}
Source   Edit  
func contains(match: Captures or CaptureBounds; i: int): bool {.inline.}
Source   Edit  
proc contains(str: string; pattern: Regex; start = 0; endpos = int.high): bool {.
    ...raises: [], tags: [RootEffect], forbids: [].}
Determine if the string contains the given pattern between the end and start positions: This function is equivalent to isSome(str.find(pattern, start, endpos)).

Example:

assert "abc".contains(re"bc")
assert not "abc".contains(re"cd")
assert not "abc".contains(re"a", start = 1)
Source   Edit  
func escapeRe(str: string): string {....raises: [], tags: [], forbids: [].}
Escapes the string so it doesn't match any special characters.

Example:

assert escapeRe("fly+wind") == "fly\\+wind"
assert escapeRe("nim*") == "nim\\*"
Source   Edit  
proc find(str: string; pattern: Regex; start = 0; endpos = int.high): Option[
    RegexMatch] {....raises: [], tags: [RootEffect], forbids: [].}
Finds the given pattern in the string between the end and start positions.
start
The start point at which to start matching. |abc is 0; a|bc is 1
endpos
The maximum index for a match; int.high means the end of the string, otherwise it’s an inclusive upper bound.
Source   Edit  
proc findAll(str: string; pattern: Regex; start = 0; endpos = int.high): seq[
    string] {....raises: [], tags: [RootEffect], forbids: [].}
Source   Edit  
func len(match: Captures or CaptureBounds): int {.inline.}
Return the number of capturing groups Source   Edit  
func match(match: RegexMatch): string {.inline, ...raises: [], tags: [],
                                        forbids: [].}
Source   Edit  
func match(str: string; pattern: Regex; start = 0; endpos = int.high): Option[
    RegexMatch] {....raises: [], tags: [RootEffect], forbids: [].}
Like find(...), but anchored to the start of the string.

Example:

assert "foo".match(re"f").isSome
assert "foo".match(re"o").isNone

assert "abc".match(re"(\w)").get.captures[0] == "a"
assert "abc".match(re"(?P<letter>\w)").get.captures["letter"] == "a"
assert "abc".match(re"(\w)\w").get.captures[-1] == "ab"

assert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
assert 0 in "abc".match(re"(\w)").get.captureBounds
assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
Source   Edit  
func matchBounds(match: RegexMatch): HSlice[int, int] {.inline, ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
func re(pattern: static string; flags: static RegexFlags = {}): static[Regex2]
Parse and compile a regular expression at compile-time Source   Edit  
func re(pattern: string; flags: RegexFlags = {}): Regex {....raises: [RegexError],
    tags: [], forbids: [].}
Parse and compile a regular expression at run-time Source   Edit  
proc replace(str: string; pattern: Regex; sub: string): string {.
    ...raises: [ValueError], tags: [RootEffect], forbids: [].}
Source   Edit  
proc replace(str: string; pattern: Regex;
             subproc: proc (match: RegexMatch): string): string {.
    ...raises: [Exception], tags: [RootEffect], forbids: [].}

Replaces each match of Regex in the string with subproc, which should never be or return nil.

If subproc is a proc (RegexMatch): string, then it is executed with each match and the return value is the replacement value.

If subproc is a proc (string): string, then it is executed with the full text of the match and the return value is the replacement value.

If subproc is a string, the syntax is as follows:

  • $$ - literal $
  • $123 - capture number 123
  • $1$# - first and second captures
  • $# - first capture

Following syntax is not supported in NRE2

  • $foo - named capture foo
  • ${foo} - same as above
  • $0 - full match

If a given capture is missing, ValueError is thrown.

Source   Edit  
proc replace(str: string; pattern: Regex; subproc: proc (match: string): string): string {.
    ...raises: [Exception], tags: [RootEffect], forbids: [].}
Source   Edit  
proc split(str: string; pattern: Regex; maxSplit = -1; start = 0): seq[string] {.
    ...raises: [], tags: [RootEffect], forbids: [].}

Splits the string with the given regex. This works according to the rules that Perl and Javascript use.

start behaves the same as in find(...).

Example:

# -  If the match is zero-width, then the string is still split:
assert "123".split(re"") == @["1", "2", "3"]

# -  If the pattern has a capture in it, it is added after the string
#    split:
assert "12".split(re"(\d)") == @["", "1", "", "2", ""]

# -  If `maxsplit != -1`, then the string will only be split
#    `maxsplit - 1` times. This means that there will be `maxsplit`
#    strings in the output seq.
assert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]
Source   Edit  
func toSeq(match: CaptureBounds; default = none(HSlice[int, int])): seq[
    Option[HSlice[int, int]]] {....raises: [], tags: [], forbids: [].}
Source   Edit  
func toSeq(match: Captures; default: Option[string] = none(string)): seq[
    Option[string]] {....raises: [], tags: [], forbids: [].}
Source   Edit  
func toTable(match: CaptureBounds): Table[string, HSlice[int, int]] {.
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
func toTable(match: Captures): Table[string, string] {....raises: [], tags: [],
    forbids: [].}
Source   Edit  

Iterators

iterator findIter(str: string; pattern: Regex; start = 0; endpos = int.high): RegexMatch {.
    ...raises: [], tags: [RootEffect], forbids: [].}
Works the same as find(...), but finds every non-overlapping match:

Example:

import std/sugar
assert collect(for a in "2222".findIter(re"22"): a.match) == @["22", "22"]
 # not @["22", "22", "22"]

Arguments are the same as find(...)

Variants:

  • proc findAll(...) returns a seq[string]
Source   Edit  
iterator items(match: CaptureBounds; default = none(HSlice[int, int])): Option[
    HSlice[int, int]] {....raises: [], tags: [], forbids: [].}
Source   Edit  
iterator items(match: Captures; default = none(string)): Option[string] {.
    ...raises: [], tags: [], forbids: [].}
Source   Edit