regex
Backtracking regex engine written in pure Jda. No external dependencies.
Supported Syntax
| Pattern | Description |
|---|---|
. | Any character |
* | Zero or more (greedy) |
+ | One or more (greedy) |
? | Zero or one |
[abc] | Character class |
[a-z] | Character range |
[^abc] | Negated character class |
^ | Start-of-string anchor |
$ | End-of-string anchor |
\d | Digit [0-9] |
\w | Word character [a-zA-Z0-9_] |
\s | Whitespace [ \t\n\r] |
\. | Escaped literal dot |
Usage
import regex
fn main() {
// Full match: does the entire string match?
print(regex_match("hello", 5, "hello", 5)) // 1
print(regex_match("hel+o", 5, "hello", 5)) // 1
// Search: find first match position
let pos = regex_search("\\d+", 3, "abc123def", 9)
print(pos) // 3
// Count: non-overlapping matches
let n = regex_count("[a-z]+", 6, "foo bar baz", 11)
print(n) // 3
}Function Reference
| Function | Signature | Description |
|---|---|---|
regex_match | (pat: &i8, pl: i64, text: &i8, tl: i64) -> i64 | Full match (entire text) |
regex_search | (pat: &i8, pl: i64, text: &i8, tl: i64) -> i64 | Find first match position |
regex_count | (pat: &i8, pl: i64, text: &i8, tl: i64) -> i64 | Count non-overlapping matches |
re_match_at | (pat: &i8, pl: i64, text: &i8, tp: i64, tl: i64) -> i64 | Match at specific position |
Detailed API
regex_match
fn regex_match(pat: &i8, pl: i64, text: &i8, tl: i64) -> i64Test whether the pattern matches the entire text string. The match must consume all bytes.
// Exact match
print(regex_match("abc", 3, "abc", 3)) // 1
print(regex_match("abc", 3, "abcd", 4)) // 0
// With quantifiers
print(regex_match("a.*c", 4, "abbc", 4)) // 1
// Character classes
print(regex_match("[0-9]+", 6, "42", 2)) // 1
print(regex_match("[0-9]+", 6, "abc", 3)) // 0Parameters:
pat– pattern byte bufferpl– pattern lengthtext– text byte buffertl– text length
Returns: 1 if the pattern matches the entire text, 0 otherwise.
regex_search
fn regex_search(pat: &i8, pl: i64, text: &i8, tl: i64) -> i64Search for the first occurrence of the pattern anywhere in the text. If the pattern starts with ^, it only tries matching at position 0.
// Find digits in a string
print(regex_search("\\d+", 3, "item42price", 11)) // 4
// Anchored search
print(regex_search("^abc", 4, "abc123", 6)) // 0
print(regex_search("^abc", 4, "xabc", 4)) // -1
// Not found
print(regex_search("xyz", 3, "hello", 5)) // -1Returns: Index of the first match position, or -1 if no match found.
regex_count
fn regex_count(pat: &i8, pl: i64, text: &i8, tl: i64) -> i64Count all non-overlapping matches of the pattern in the text. After each match, scanning advances by the match length (or by 1 if the match is zero-length).
// Count words
print(regex_count("[a-z]+", 6, "one two three", 13)) // 3
// Count digits
print(regex_count("\\d", 2, "a1b2c3", 6)) // 3
// Count specific pattern
print(regex_count("ab", 2, "ababab", 6)) // 3Returns: Number of non-overlapping matches.
re_match_at
fn re_match_at(pat: &i8, pl: i64, text: &i8, tp: i64, tl: i64) -> i64Try to match the pattern at a specific position tp in the text. This is the low-level matching function used by regex_match, regex_search, and regex_count.
// Match "abc" starting at position 3
let mlen = re_match_at("abc", 3, "xxxabc", 3, 6)
print(mlen) // 3 (matched 3 bytes)
// No match at this position
let mlen2 = re_match_at("abc", 3, "xxxabc", 0, 6)
print(mlen2) // -1 (no match)Returns: Number of bytes matched, or -1 if no match at that position.
re_match_escape
fn re_match_escape(pp: i64, ch: i64) -> i64Internal: Check if character ch matches an escape sequence at pattern position pp. Supports \d (digit), \w (word), and \s (whitespace).
re_match_class
fn re_match_class(pp: i64, ch: i64) -> i64Internal: Check if character ch matches the character class [...] starting at pattern position pp. Handles ranges [a-z] and negation [^...].
re_skip_class
fn re_skip_class(pp: i64) -> i64Internal: Skip past a character class [...] in the pattern string. Returns the new pattern position.
re_skip_elem
fn re_skip_elem(pp: i64) -> i64Internal: Skip one pattern element (literal, dot, or class) and its following quantifier (*, +, ?) if present.
re_char_match
fn re_char_match(pp: i64, ch: i64) -> i64Internal: Match a single pattern element at pp (literal, dot, or class) against a single text character ch.
re_match_star
fn re_match_star(ep: i64, rp: i64, tp: i64) -> i64Internal: Greedy matcher for the * quantifier. Matches as many occurrences of the element at ep as possible, then backtracks to match the rest of the pattern at rp.
re_match_plus
fn re_match_plus(ep: i64, rp: i64, tp: i64) -> i64Internal: Matcher for the + quantifier. Requires at least one match of the element at ep, then behaves like *.
re_match_quest
fn re_match_quest(ep: i64, rp: i64, tp: i64) -> i64Internal: Matcher for the ? quantifier. Tries matching the element at ep once, and if that fails (or if the rest of the pattern fails), tries matching zero occurrences.
re_match_here
fn re_match_here(pp: i64, tp: i64) -> i64Internal: The core recursive backtracking matcher. Tries to match the pattern starting at pp against the text starting at tp.
Internal Functions
| Function | Signature | Description |
|---|---|---|
re_match_escape | (pp: i64, ch: i64) -> i64 | Check \d, \w, \s escapes |
re_match_class | (pp: i64, ch: i64) -> i64 | Check character class membership |
re_skip_class | (pp: i64) -> i64 | Skip past [...] in pattern |
re_skip_elem | (pp: i64) -> i64 | Skip one pattern element |
re_char_match | (pp: i64, ch: i64) -> i64 | Match one pattern element against one char |
re_match_star | (ep: i64, rp: i64, tp: i64) -> i64 | Greedy * quantifier |
re_match_plus | (ep: i64, rp: i64, tp: i64) -> i64 | + quantifier (one or more) |
re_match_quest | (ep: i64, rp: i64, tp: i64) -> i64 | ? quantifier (zero or one) |
re_match_here | (pp: i64, tp: i64) -> i64 | Core recursive matcher |