688 lines
12 KiB
TOML
688 lines
12 KiB
TOML
# These tests are for the "special" word boundary assertions. That is,
|
|
# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty
|
|
# assertions for more niche use cases, but hitting those cases without these
|
|
# assertions is difficult. For example, \b{start-half} and \b{end-half} are
|
|
# used to implement the -w/--word-regexp flag in a grep program.
|
|
|
|
# Tests for (?-u:\b{start})
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-010"
|
|
regex = '\b{start}'
|
|
haystack = "a"
|
|
matches = [[0, 0]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-020"
|
|
regex = '\b{start}'
|
|
haystack = "a "
|
|
matches = [[0, 0]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-030"
|
|
regex = '\b{start}'
|
|
haystack = " a "
|
|
matches = [[1, 1]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-040"
|
|
regex = '\b{start}'
|
|
haystack = ""
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-050"
|
|
regex = '\b{start}'
|
|
haystack = "ab"
|
|
matches = [[0, 0]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-060"
|
|
regex = '\b{start}'
|
|
haystack = "𝛃"
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-060-bounds"
|
|
regex = '\b{start}'
|
|
haystack = "𝛃"
|
|
bounds = [2, 3]
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-070"
|
|
regex = '\b{start}'
|
|
haystack = " 𝛃 "
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-080"
|
|
regex = '\b{start}'
|
|
haystack = "𝛃𐆀"
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-090"
|
|
regex = '\b{start}'
|
|
haystack = "𝛃b"
|
|
matches = [[4, 4]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-ascii-110"
|
|
regex = '\b{start}'
|
|
haystack = "b𝛃"
|
|
matches = [[0, 0]]
|
|
unicode = false
|
|
|
|
# Tests for (?-u:\b{end})
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-010"
|
|
regex = '\b{end}'
|
|
haystack = "a"
|
|
matches = [[1, 1]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-020"
|
|
regex = '\b{end}'
|
|
haystack = "a "
|
|
matches = [[1, 1]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-030"
|
|
regex = '\b{end}'
|
|
haystack = " a "
|
|
matches = [[2, 2]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-040"
|
|
regex = '\b{end}'
|
|
haystack = ""
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-050"
|
|
regex = '\b{end}'
|
|
haystack = "ab"
|
|
matches = [[2, 2]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-060"
|
|
regex = '\b{end}'
|
|
haystack = "𝛃"
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-060-bounds"
|
|
regex = '\b{end}'
|
|
haystack = "𝛃"
|
|
bounds = [2, 3]
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-070"
|
|
regex = '\b{end}'
|
|
haystack = " 𝛃 "
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-080"
|
|
regex = '\b{end}'
|
|
haystack = "𝛃𐆀"
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-090"
|
|
regex = '\b{end}'
|
|
haystack = "𝛃b"
|
|
matches = [[5, 5]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-ascii-110"
|
|
regex = '\b{end}'
|
|
haystack = "b𝛃"
|
|
matches = [[1, 1]]
|
|
unicode = false
|
|
|
|
# Tests for \b{start}
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-010"
|
|
regex = '\b{start}'
|
|
haystack = "a"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-020"
|
|
regex = '\b{start}'
|
|
haystack = "a "
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-030"
|
|
regex = '\b{start}'
|
|
haystack = " a "
|
|
matches = [[1, 1]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-040"
|
|
regex = '\b{start}'
|
|
haystack = ""
|
|
matches = []
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-050"
|
|
regex = '\b{start}'
|
|
haystack = "ab"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-060"
|
|
regex = '\b{start}'
|
|
haystack = "𝛃"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-060-bounds"
|
|
regex = '\b{start}'
|
|
haystack = "𝛃"
|
|
bounds = [2, 3]
|
|
matches = []
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-070"
|
|
regex = '\b{start}'
|
|
haystack = " 𝛃 "
|
|
matches = [[1, 1]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-080"
|
|
regex = '\b{start}'
|
|
haystack = "𝛃𐆀"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-090"
|
|
regex = '\b{start}'
|
|
haystack = "𝛃b"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-unicode-110"
|
|
regex = '\b{start}'
|
|
haystack = "b𝛃"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
# Tests for \b{end}
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-010"
|
|
regex = '\b{end}'
|
|
haystack = "a"
|
|
matches = [[1, 1]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-020"
|
|
regex = '\b{end}'
|
|
haystack = "a "
|
|
matches = [[1, 1]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-030"
|
|
regex = '\b{end}'
|
|
haystack = " a "
|
|
matches = [[2, 2]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-040"
|
|
regex = '\b{end}'
|
|
haystack = ""
|
|
matches = []
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-050"
|
|
regex = '\b{end}'
|
|
haystack = "ab"
|
|
matches = [[2, 2]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-060"
|
|
regex = '\b{end}'
|
|
haystack = "𝛃"
|
|
matches = [[4, 4]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-060-bounds"
|
|
regex = '\b{end}'
|
|
haystack = "𝛃"
|
|
bounds = [2, 3]
|
|
matches = []
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-070"
|
|
regex = '\b{end}'
|
|
haystack = " 𝛃 "
|
|
matches = [[5, 5]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-080"
|
|
regex = '\b{end}'
|
|
haystack = "𝛃𐆀"
|
|
matches = [[4, 4]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-090"
|
|
regex = '\b{end}'
|
|
haystack = "𝛃b"
|
|
matches = [[5, 5]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-unicode-110"
|
|
regex = '\b{end}'
|
|
haystack = "b𝛃"
|
|
matches = [[5, 5]]
|
|
unicode = true
|
|
|
|
# Tests for (?-u:\b{start-half})
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-010"
|
|
regex = '\b{start-half}'
|
|
haystack = "a"
|
|
matches = [[0, 0]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-020"
|
|
regex = '\b{start-half}'
|
|
haystack = "a "
|
|
matches = [[0, 0], [2, 2]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-030"
|
|
regex = '\b{start-half}'
|
|
haystack = " a "
|
|
matches = [[0, 0], [1, 1], [3, 3]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-040"
|
|
regex = '\b{start-half}'
|
|
haystack = ""
|
|
matches = [[0, 0]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-050"
|
|
regex = '\b{start-half}'
|
|
haystack = "ab"
|
|
matches = [[0, 0]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-060"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃"
|
|
matches = [[0, 0], [4, 4]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-060-noutf8"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃"
|
|
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
|
unicode = false
|
|
utf8 = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-060-bounds"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃"
|
|
bounds = [2, 3]
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-070"
|
|
regex = '\b{start-half}'
|
|
haystack = " 𝛃 "
|
|
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-080"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃𐆀"
|
|
matches = [[0, 0], [4, 4], [8, 8]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-090"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃b"
|
|
matches = [[0, 0], [4, 4]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-start-half-ascii-110"
|
|
regex = '\b{start-half}'
|
|
haystack = "b𝛃"
|
|
matches = [[0, 0], [5, 5]]
|
|
unicode = false
|
|
|
|
# Tests for (?-u:\b{end-half})
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-010"
|
|
regex = '\b{end-half}'
|
|
haystack = "a"
|
|
matches = [[1, 1]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-020"
|
|
regex = '\b{end-half}'
|
|
haystack = "a "
|
|
matches = [[1, 1], [2, 2]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-030"
|
|
regex = '\b{end-half}'
|
|
haystack = " a "
|
|
matches = [[0, 0], [2, 2], [3, 3]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-040"
|
|
regex = '\b{end-half}'
|
|
haystack = ""
|
|
matches = [[0, 0]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-050"
|
|
regex = '\b{end-half}'
|
|
haystack = "ab"
|
|
matches = [[2, 2]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-060"
|
|
regex = '\b{end-half}'
|
|
haystack = "𝛃"
|
|
matches = [[0, 0], [4, 4]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-060-bounds"
|
|
regex = '\b{end-half}'
|
|
haystack = "𝛃"
|
|
bounds = [2, 3]
|
|
matches = []
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-070"
|
|
regex = '\b{end-half}'
|
|
haystack = " 𝛃 "
|
|
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-080"
|
|
regex = '\b{end-half}'
|
|
haystack = "𝛃𐆀"
|
|
matches = [[0, 0], [4, 4], [8, 8]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-090"
|
|
regex = '\b{end-half}'
|
|
haystack = "𝛃b"
|
|
matches = [[0, 0], [5, 5]]
|
|
unicode = false
|
|
|
|
[[test]]
|
|
name = "word-end-half-ascii-110"
|
|
regex = '\b{end-half}'
|
|
haystack = "b𝛃"
|
|
matches = [[1, 1], [5, 5]]
|
|
unicode = false
|
|
|
|
# Tests for \b{start-half}
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-010"
|
|
regex = '\b{start-half}'
|
|
haystack = "a"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-020"
|
|
regex = '\b{start-half}'
|
|
haystack = "a "
|
|
matches = [[0, 0], [2, 2]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-030"
|
|
regex = '\b{start-half}'
|
|
haystack = " a "
|
|
matches = [[0, 0], [1, 1], [3, 3]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-040"
|
|
regex = '\b{start-half}'
|
|
haystack = ""
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-050"
|
|
regex = '\b{start-half}'
|
|
haystack = "ab"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-060"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-060-bounds"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃"
|
|
bounds = [2, 3]
|
|
matches = []
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-070"
|
|
regex = '\b{start-half}'
|
|
haystack = " 𝛃 "
|
|
matches = [[0, 0], [1, 1], [6, 6]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-080"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃𐆀"
|
|
matches = [[0, 0], [8, 8]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-090"
|
|
regex = '\b{start-half}'
|
|
haystack = "𝛃b"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-start-half-unicode-110"
|
|
regex = '\b{start-half}'
|
|
haystack = "b𝛃"
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
# Tests for \b{end-half}
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-010"
|
|
regex = '\b{end-half}'
|
|
haystack = "a"
|
|
matches = [[1, 1]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-020"
|
|
regex = '\b{end-half}'
|
|
haystack = "a "
|
|
matches = [[1, 1], [2, 2]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-030"
|
|
regex = '\b{end-half}'
|
|
haystack = " a "
|
|
matches = [[0, 0], [2, 2], [3, 3]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-040"
|
|
regex = '\b{end-half}'
|
|
haystack = ""
|
|
matches = [[0, 0]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-050"
|
|
regex = '\b{end-half}'
|
|
haystack = "ab"
|
|
matches = [[2, 2]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-060"
|
|
regex = '\b{end-half}'
|
|
haystack = "𝛃"
|
|
matches = [[4, 4]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-060-bounds"
|
|
regex = '\b{end-half}'
|
|
haystack = "𝛃"
|
|
bounds = [2, 3]
|
|
matches = []
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-070"
|
|
regex = '\b{end-half}'
|
|
haystack = " 𝛃 "
|
|
matches = [[0, 0], [5, 5], [6, 6]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-080"
|
|
regex = '\b{end-half}'
|
|
haystack = "𝛃𐆀"
|
|
matches = [[4, 4], [8, 8]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-090"
|
|
regex = '\b{end-half}'
|
|
haystack = "𝛃b"
|
|
matches = [[5, 5]]
|
|
unicode = true
|
|
|
|
[[test]]
|
|
name = "word-end-half-unicode-110"
|
|
regex = '\b{end-half}'
|
|
haystack = "b𝛃"
|
|
matches = [[5, 5]]
|
|
unicode = true
|
|
|
|
# Specialty tests.
|
|
|
|
# Since \r is special cased in the start state computation (to deal with CRLF
|
|
# mode), this test ensures that the correct start state is computed when the
|
|
# pattern starts with a half word boundary assertion.
|
|
[[test]]
|
|
name = "word-start-half-ascii-carriage"
|
|
regex = '\b{start-half}[a-z]+'
|
|
haystack = 'ABC\rabc'
|
|
matches = [[4, 7]]
|
|
bounds = [4, 7]
|
|
unescape = true
|
|
|
|
# Since \n is also special cased in the start state computation, this test
|
|
# ensures that the correct start state is computed when the pattern starts with
|
|
# a half word boundary assertion.
|
|
[[test]]
|
|
name = "word-start-half-ascii-linefeed"
|
|
regex = '\b{start-half}[a-z]+'
|
|
haystack = 'ABC\nabc'
|
|
matches = [[4, 7]]
|
|
bounds = [4, 7]
|
|
unescape = true
|
|
|
|
# Like the carriage return test above, but with a custom line terminator.
|
|
[[test]]
|
|
name = "word-start-half-ascii-customlineterm"
|
|
regex = '\b{start-half}[a-z]+'
|
|
haystack = 'ABC!abc'
|
|
matches = [[4, 7]]
|
|
bounds = [4, 7]
|
|
unescape = true
|
|
line-terminator = '!'
|