rust-ipfs/vendor/regex/testdata/word-boundary-special.toml
Vladislav Tsarev 3dec7eeb01 Initial commit
2024-10-18 11:36:33 +03:00

688 lines
12 KiB
TOML

# These tests are for the "special" word boundary assertions. That is,
# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty
# assertions for more niche use cases, but hitting those cases without these
# assertions is difficult. For example, \b{start-half} and \b{end-half} are
# used to implement the -w/--word-regexp flag in a grep program.
# Tests for (?-u:\b{start})
[[test]]
name = "word-start-ascii-010"
regex = '\b{start}'
haystack = "a"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-020"
regex = '\b{start}'
haystack = "a "
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-030"
regex = '\b{start}'
haystack = " a "
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-start-ascii-040"
regex = '\b{start}'
haystack = ""
matches = []
unicode = false
[[test]]
name = "word-start-ascii-050"
regex = '\b{start}'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-060"
regex = '\b{start}'
haystack = "𝛃"
matches = []
unicode = false
[[test]]
name = "word-start-ascii-060-bounds"
regex = '\b{start}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-start-ascii-070"
regex = '\b{start}'
haystack = " 𝛃 "
matches = []
unicode = false
[[test]]
name = "word-start-ascii-080"
regex = '\b{start}'
haystack = "𝛃𐆀"
matches = []
unicode = false
[[test]]
name = "word-start-ascii-090"
regex = '\b{start}'
haystack = "𝛃b"
matches = [[4, 4]]
unicode = false
[[test]]
name = "word-start-ascii-110"
regex = '\b{start}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = false
# Tests for (?-u:\b{end})
[[test]]
name = "word-end-ascii-010"
regex = '\b{end}'
haystack = "a"
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-ascii-020"
regex = '\b{end}'
haystack = "a "
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-ascii-030"
regex = '\b{end}'
haystack = " a "
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-ascii-040"
regex = '\b{end}'
haystack = ""
matches = []
unicode = false
[[test]]
name = "word-end-ascii-050"
regex = '\b{end}'
haystack = "ab"
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-ascii-060"
regex = '\b{end}'
haystack = "𝛃"
matches = []
unicode = false
[[test]]
name = "word-end-ascii-060-bounds"
regex = '\b{end}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-end-ascii-070"
regex = '\b{end}'
haystack = " 𝛃 "
matches = []
unicode = false
[[test]]
name = "word-end-ascii-080"
regex = '\b{end}'
haystack = "𝛃𐆀"
matches = []
unicode = false
[[test]]
name = "word-end-ascii-090"
regex = '\b{end}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = false
[[test]]
name = "word-end-ascii-110"
regex = '\b{end}'
haystack = "b𝛃"
matches = [[1, 1]]
unicode = false
# Tests for \b{start}
[[test]]
name = "word-start-unicode-010"
regex = '\b{start}'
haystack = "a"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-020"
regex = '\b{start}'
haystack = "a "
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-030"
regex = '\b{start}'
haystack = " a "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-start-unicode-040"
regex = '\b{start}'
haystack = ""
matches = []
unicode = true
[[test]]
name = "word-start-unicode-050"
regex = '\b{start}'
haystack = "ab"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-060"
regex = '\b{start}'
haystack = "𝛃"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-060-bounds"
regex = '\b{start}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-start-unicode-070"
regex = '\b{start}'
haystack = " 𝛃 "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-start-unicode-080"
regex = '\b{start}'
haystack = "𝛃𐆀"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-090"
regex = '\b{start}'
haystack = "𝛃b"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-110"
regex = '\b{start}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = true
# Tests for \b{end}
[[test]]
name = "word-end-unicode-010"
regex = '\b{end}'
haystack = "a"
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-unicode-020"
regex = '\b{end}'
haystack = "a "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-unicode-030"
regex = '\b{end}'
haystack = " a "
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-unicode-040"
regex = '\b{end}'
haystack = ""
matches = []
unicode = true
[[test]]
name = "word-end-unicode-050"
regex = '\b{end}'
haystack = "ab"
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-unicode-060"
regex = '\b{end}'
haystack = "𝛃"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-unicode-060-bounds"
regex = '\b{end}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-end-unicode-070"
regex = '\b{end}'
haystack = " 𝛃 "
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-unicode-080"
regex = '\b{end}'
haystack = "𝛃𐆀"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-unicode-090"
regex = '\b{end}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-unicode-110"
regex = '\b{end}'
haystack = "b𝛃"
matches = [[5, 5]]
unicode = true
# Tests for (?-u:\b{start-half})
[[test]]
name = "word-start-half-ascii-010"
regex = '\b{start-half}'
haystack = "a"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-020"
regex = '\b{start-half}'
haystack = "a "
matches = [[0, 0], [2, 2]]
unicode = false
[[test]]
name = "word-start-half-ascii-030"
regex = '\b{start-half}'
haystack = " a "
matches = [[0, 0], [1, 1], [3, 3]]
unicode = false
[[test]]
name = "word-start-half-ascii-040"
regex = '\b{start-half}'
haystack = ""
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-050"
regex = '\b{start-half}'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-060"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-start-half-ascii-060-noutf8"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
unicode = false
utf8 = false
[[test]]
name = "word-start-half-ascii-060-bounds"
regex = '\b{start-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-start-half-ascii-070"
regex = '\b{start-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
unicode = false
[[test]]
name = "word-start-half-ascii-080"
regex = '\b{start-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [4, 4], [8, 8]]
unicode = false
[[test]]
name = "word-start-half-ascii-090"
regex = '\b{start-half}'
haystack = "𝛃b"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-start-half-ascii-110"
regex = '\b{start-half}'
haystack = "b𝛃"
matches = [[0, 0], [5, 5]]
unicode = false
# Tests for (?-u:\b{end-half})
[[test]]
name = "word-end-half-ascii-010"
regex = '\b{end-half}'
haystack = "a"
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-half-ascii-020"
regex = '\b{end-half}'
haystack = "a "
matches = [[1, 1], [2, 2]]
unicode = false
[[test]]
name = "word-end-half-ascii-030"
regex = '\b{end-half}'
haystack = " a "
matches = [[0, 0], [2, 2], [3, 3]]
unicode = false
[[test]]
name = "word-end-half-ascii-040"
regex = '\b{end-half}'
haystack = ""
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-end-half-ascii-050"
regex = '\b{end-half}'
haystack = "ab"
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-half-ascii-060"
regex = '\b{end-half}'
haystack = "𝛃"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-end-half-ascii-060-bounds"
regex = '\b{end-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-end-half-ascii-070"
regex = '\b{end-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
unicode = false
[[test]]
name = "word-end-half-ascii-080"
regex = '\b{end-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [4, 4], [8, 8]]
unicode = false
[[test]]
name = "word-end-half-ascii-090"
regex = '\b{end-half}'
haystack = "𝛃b"
matches = [[0, 0], [5, 5]]
unicode = false
[[test]]
name = "word-end-half-ascii-110"
regex = '\b{end-half}'
haystack = "b𝛃"
matches = [[1, 1], [5, 5]]
unicode = false
# Tests for \b{start-half}
[[test]]
name = "word-start-half-unicode-010"
regex = '\b{start-half}'
haystack = "a"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-020"
regex = '\b{start-half}'
haystack = "a "
matches = [[0, 0], [2, 2]]
unicode = true
[[test]]
name = "word-start-half-unicode-030"
regex = '\b{start-half}'
haystack = " a "
matches = [[0, 0], [1, 1], [3, 3]]
unicode = true
[[test]]
name = "word-start-half-unicode-040"
regex = '\b{start-half}'
haystack = ""
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-050"
regex = '\b{start-half}'
haystack = "ab"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-060"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-060-bounds"
regex = '\b{start-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-start-half-unicode-070"
regex = '\b{start-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [6, 6]]
unicode = true
[[test]]
name = "word-start-half-unicode-080"
regex = '\b{start-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [8, 8]]
unicode = true
[[test]]
name = "word-start-half-unicode-090"
regex = '\b{start-half}'
haystack = "𝛃b"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-110"
regex = '\b{start-half}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = true
# Tests for \b{end-half}
[[test]]
name = "word-end-half-unicode-010"
regex = '\b{end-half}'
haystack = "a"
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-half-unicode-020"
regex = '\b{end-half}'
haystack = "a "
matches = [[1, 1], [2, 2]]
unicode = true
[[test]]
name = "word-end-half-unicode-030"
regex = '\b{end-half}'
haystack = " a "
matches = [[0, 0], [2, 2], [3, 3]]
unicode = true
[[test]]
name = "word-end-half-unicode-040"
regex = '\b{end-half}'
haystack = ""
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-end-half-unicode-050"
regex = '\b{end-half}'
haystack = "ab"
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-half-unicode-060"
regex = '\b{end-half}'
haystack = "𝛃"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-half-unicode-060-bounds"
regex = '\b{end-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-end-half-unicode-070"
regex = '\b{end-half}'
haystack = " 𝛃 "
matches = [[0, 0], [5, 5], [6, 6]]
unicode = true
[[test]]
name = "word-end-half-unicode-080"
regex = '\b{end-half}'
haystack = "𝛃𐆀"
matches = [[4, 4], [8, 8]]
unicode = true
[[test]]
name = "word-end-half-unicode-090"
regex = '\b{end-half}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-half-unicode-110"
regex = '\b{end-half}'
haystack = "b𝛃"
matches = [[5, 5]]
unicode = true
# Specialty tests.
# Since \r is special cased in the start state computation (to deal with CRLF
# mode), this test ensures that the correct start state is computed when the
# pattern starts with a half word boundary assertion.
[[test]]
name = "word-start-half-ascii-carriage"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC\rabc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
# Since \n is also special cased in the start state computation, this test
# ensures that the correct start state is computed when the pattern starts with
# a half word boundary assertion.
[[test]]
name = "word-start-half-ascii-linefeed"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC\nabc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
# Like the carriage return test above, but with a custom line terminator.
[[test]]
name = "word-start-half-ascii-customlineterm"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC!abc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
line-terminator = '!'