/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/*
** 2012-11-13
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** The code in this file implements a compact but reasonably
** efficient regular-expression matcher for posix extended regular
** expressions against UTF8 text.
**
** This file is an SQLite extension. It registers a single function
** named "regexp(A,B)" where A is the regular expression and B is the
** string to be matched. By registering this function, SQLite will also
** then implement the "B regexp A" operator. Note that with the function
** the regular expression comes first, but with the operator it comes
** second.
**
** The following regular expression syntax is supported:
**
** X* zero or more occurrences of X
** X+ one or more occurrences of X
** X? zero or one occurrences of X
** X{p,q} between p and q occurrences of X
** (X) match X
** X|Y X or Y
** ^X X occurring at the beginning of the string
** X$ X occurring at the end of the string
** . Match any single character
** \c Character c where c is one of \{}()[]|*+?.
** \c C-language escapes for c in afnrtv. ex: \t or \n
** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX
** \xXX Where XX is exactly 2 hex digits, unicode value XX
** [abc] Any single character from the set abc
** [^abc] Any single character not in the set abc
** [a-z] Any single character in the range a-z
** [^a-z] Any single character not in the range a-z
** \b Word boundary
** \w Word character. [A-Za-z0-9_]
** \W Non-word character
** \d Digit
** \D Non-digit
** \s Whitespace character
** \S Non-whitespace character
**
** A nondeterministic finite automaton (NFA) is used for matching, so the
** performance is bounded by O(N*M) where N is the size of the regular
** expression and M is the size of the input string. The matcher never
** exhibits exponential behavior. Note that the X{p,q} operator expands
** to p copies of X following by q-p copies of X? and that the size of the
** regular expression in the O(N*M) performance bound is computed after
** this expansion.
*/
#if !defined NET_FOSSIL_SCM_REGEXP_H_INCLUDED
#define NET_FOSSIL_SCM_REGEXP_H_INCLUDED
#ifdef __cplusplus
extern "C" {
#endif
struct sqlite3_api_routines;
/*(this file is derived from sqlite3 regexp.c; I wanted to expose
the engine to also be used directly*/
typedef struct sqlite3_ReCompiled sqlite3_ReCompiled;
/*
** Compile a textual regular expression in zIn[] into a compiled regular
** expression suitable for us by sqlite3re_match() and return a pointer to the
** compiled regular expression in *ppRe. Return NULL on success or an
** error message if something goes wrong.
*/
const char* sqlite3re_compile ( sqlite3_ReCompiled** ppRe, const char* zIn, int noCase );
/* Free and reclaim all the memory used by a previously compiled
** regular expression. Applications should invoke this routine once
** for every call to re_compile() to avoid memory leaks.
*/
void sqlite3re_free ( sqlite3_ReCompiled* pRe );
/* Run a compiled regular expression on the zero-terminated input
** string zIn[]. Return true on a match and false if there is no match.
*/
int sqlite3re_match ( sqlite3_ReCompiled* pRe, const unsigned char* zIn, int nIn );
/*registers the REGEXP extension function into a sqlite3 database instance */
int sqlite3_regexp_init ( sqlite3* db, char** pzErrMsg,
const struct sqlite3_api_routines* pApi );
#ifdef __cplusplus
}
#endif
#else
#endif /*#ifndef NET_FOSSIL_SCM_REGEXP_H_INCLUDED*/