Index: src/db.c ================================================================== --- src/db.c +++ src/db.c @@ -1802,10 +1802,11 @@ if( g.fSqlTrace ) sqlite3_trace_v2(db, SQLITE_TRACE_PROFILE, db_sql_trace, 0); db_add_aux_functions(db); re_add_sql_func(db); /* The REGEXP operator */ foci_register(db); /* The "files_of_checkin" virtual table */ sqlite3_set_authorizer(db, db_top_authorizer, db); + db_register_fts5(db) /* in search.c */; return db; } /* Index: src/search.c ================================================================== --- src/search.c +++ src/search.c @@ -921,29 +921,38 @@ unsigned int srchFlags /* What to search over */ ){ Blob sql; char *zPat = mprintf("%s",zPattern); int i; + static const char *zSnippetCall; if( srchFlags==0 ) return; sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8|SQLITE_INNOCUOUS, 0, search_rank_sqlfunc, 0, 0); for(i=0; zPat[i]; i++){ if( zPat[i]=='-' || zPat[i]=='"' ) zPat[i] = ' '; } blob_init(&sql, 0, 0); + if( search_index_type(0)==4 ){ + /* If this repo is still using the legacy FTS4 search index, then + ** the snippet() function is slightly different */ + zSnippetCall = "snippet(ftsidx,'','',' ... ',-1,35)"; + }else{ + /* This is the common case - Using newer FTS5 search index */ + zSnippetCall = "snippet(ftsidx,-1,'','',' ... ',35)"; + } blob_appendf(&sql, "INSERT INTO x(label,url,score,id,date,snip) " " SELECT ftsdocs.label," " ftsdocs.url," " rank(matchinfo(ftsidx,'pcsx'))," " ftsdocs.type || ftsdocs.rid," " datetime(ftsdocs.mtime)," - " snippet(ftsidx,'','',' ... ',-1,35)" + " %s" " FROM ftsidx CROSS JOIN ftsdocs" " WHERE ftsidx MATCH %Q" - " AND ftsdocs.rowid=ftsidx.docid", - zPat + " AND ftsdocs.rowid=ftsidx.rowid", + zSnippetCall /*safe-for-%s*/, zPat ); fossil_free(zPat); if( srchFlags!=SRCH_ALL ){ const char *zSep = " AND ("; static const struct { unsigned m; char c; } aMask[] = { @@ -1508,11 +1517,11 @@ /* The schema for the full-text index */ static const char zFtsSchema[] = @ -- One entry for each possible search result @ CREATE TABLE IF NOT EXISTS repository.ftsdocs( -@ rowid INTEGER PRIMARY KEY, -- Maps to the ftsidx.docid +@ rowid INTEGER PRIMARY KEY, -- Maps to the ftsidx.rowid @ type CHAR(1), -- Type of document @ rid INTEGER, -- BLOB.RID or TAG.TAGID for the document @ name TEXT, -- Additional document description @ idxed BOOLEAN, -- True if currently in the index @ label TEXT, -- Label to print on search results @@ -1526,11 +1535,11 @@ @ CREATE VIEW IF NOT EXISTS repository.ftscontent AS @ SELECT rowid, type, rid, name, idxed, label, url, mtime, @ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body' @ FROM ftsdocs; @ CREATE VIRTUAL TABLE IF NOT EXISTS repository.ftsidx -@ USING fts4(content="ftscontent", title, body%s); +@ USING fts5(content="ftscontent", title, body%s); ; static const char zFtsDrop[] = @ DROP TABLE IF EXISTS repository.ftsidx; @ DROP VIEW IF EXISTS repository.ftscontent; @ DROP TABLE IF EXISTS repository.ftsdocs; @@ -1551,18 +1560,41 @@ db_multi_exec(zFtsDrop/*works-like:""*/); searchIdxExists = 0; } /* -** Return true if the full-text search index exists +** Return true if the full-text search index exists. See also the +** search_index_type() function. */ int search_index_exists(void){ if( searchIdxExists<0 ){ searchIdxExists = db_table_exists("repository","ftsdocs"); } return searchIdxExists; } + +/* +** Determine which full-text search index is currently being used to +** add searching. Return values: +** +** 0 No search index is available +** 4 FTS3/4 +** 5 FTS5 +** +** Results are cached. Make the argument 1 to reset the cache. See +** also the search_index_exists() routine. +*/ +int search_index_type(int bReset){ + static int idxType = -1; + if( idxType<0 || bReset ){ + idxType = db_int(0, + "SELECT CASE WHEN sql GLOB '*fts4*' THEN 4 ELSE 5 END" + " FROM repository.sqlite_schema WHERE name='ftsidx'" + ); + } + return idxType; +} /* ** Fill the FTSDOCS table with unindexed entries for everything ** in the repository. This uses INSERT OR IGNORE so entries already ** in FTSDOCS are unchanged. @@ -1606,11 +1638,11 @@ char zType[2]; zType[0] = cType; zType[1] = 0; search_sql_setup(g.db); db_multi_exec( - "DELETE FROM ftsidx WHERE docid IN" + "DELETE FROM ftsidx WHERE rowid IN" " (SELECT rowid FROM ftsdocs WHERE type=%Q AND rid=%d AND idxed)", zType, rid ); db_multi_exec( "REPLACE INTO ftsdocs(type,rid,name,idxed)" @@ -1617,11 +1649,11 @@ " VALUES(%Q,%d,%Q,0)", zType, rid, zName ); if( cType=='w' || cType=='e' ){ db_multi_exec( - "DELETE FROM ftsidx WHERE docid IN" + "DELETE FROM ftsidx WHERE rowid IN" " (SELECT rowid FROM ftsdocs WHERE type='%c' AND name=%Q AND idxed)", cType, zName ); db_multi_exec( "DELETE FROM ftsdocs WHERE type='%c' AND name=%Q AND rid!=%d", @@ -1657,11 +1689,11 @@ " WHERE foci.checkinID=%d AND blob.uuid=foci.uuid" " AND %z", ckid, glob_expr("foci.filename", db_get("doc-glob","")) ); db_multi_exec( - "DELETE FROM ftsidx WHERE docid IN" + "DELETE FROM ftsidx WHERE rowid IN" " (SELECT rowid FROM ftsdocs WHERE type='d'" " AND rid NOT IN (SELECT rid FROM current_docs))" ); db_multi_exec( "DELETE FROM ftsdocs WHERE type='d'" @@ -1676,11 +1708,11 @@ " %.17g" " FROM current_docs", zDocBr, rTime ); db_multi_exec( - "INSERT INTO ftsidx(docid,title,body)" + "INSERT INTO ftsidx(rowid,title,body)" " SELECT rowid, label, bx FROM ftsdocs WHERE type='d' AND NOT idxed" ); db_multi_exec( "UPDATE ftsdocs SET" " idxed=1," @@ -1693,11 +1725,11 @@ /* ** Deal with all of the unindexed 'c' terms in FTSDOCS */ static void search_update_checkin_index(void){ db_multi_exec( - "INSERT INTO ftsidx(docid,title,body)" + "INSERT INTO ftsidx(rowid,title,body)" " SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs" " WHERE type='c' AND NOT idxed;" ); db_multi_exec( "UPDATE ftsdocs SET idxed=1, name=NULL," @@ -1716,11 +1748,11 @@ /* ** Deal with all of the unindexed 't' terms in FTSDOCS */ static void search_update_ticket_index(void){ db_multi_exec( - "INSERT INTO ftsidx(docid,title,body)" + "INSERT INTO ftsidx(rowid,title,body)" " SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs" " WHERE type='t' AND NOT idxed;" ); if( db_changes()==0 ) return; db_multi_exec( @@ -1739,11 +1771,11 @@ /* ** Deal with all of the unindexed 'w' terms in FTSDOCS */ static void search_update_wiki_index(void){ db_multi_exec( - "INSERT INTO ftsidx(docid,title,body)" + "INSERT INTO ftsidx(rowid,title,body)" " SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs" " WHERE type='w' AND NOT idxed;" ); if( db_changes()==0 ) return; db_multi_exec( @@ -1761,11 +1793,11 @@ /* ** Deal with all of the unindexed 'f' terms in FTSDOCS */ static void search_update_forum_index(void){ db_multi_exec( - "INSERT INTO ftsidx(docid,title,body)" + "INSERT INTO ftsidx(rowid,title,body)" " SELECT rowid, title('f',rid,NULL),body('f',rid,NULL) FROM ftsdocs" " WHERE type='f' AND NOT idxed;" ); if( db_changes()==0 ) return; db_multi_exec( @@ -1784,11 +1816,11 @@ /* ** Deal with all of the unindexed 'e' terms in FTSDOCS */ static void search_update_technote_index(void){ db_multi_exec( - "INSERT INTO ftsidx(docid,title,body)" + "INSERT INTO ftsidx(rowid,title,body)" " SELECT rowid, title('e',rid,NULL),body('e',rid,NULL) FROM ftsdocs" " WHERE type='e' AND NOT idxed;" ); if( db_changes()==0 ) return; db_multi_exec( @@ -1869,11 +1901,11 @@ ** ** The current search settings are displayed after any changes are applied. ** Run this command with no arguments to simply see the settings. */ void fts_config_cmd(void){ - static const struct { + static const struct { int iCmd; const char *z; } aCmd[] = { { 1, "reindex" }, { 2, "index" }, @@ -1957,11 +1989,11 @@ db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off"); } fossil_print("%-17s %s\n", "Porter stemmer:", db_get_boolean("search-stemmer",0) ? "on" : "off"); if( search_index_exists() ){ - fossil_print("%-17s enabled\n", "full-text index:"); + fossil_print("%-17s FTS%d\n", "full-text index:", search_index_type(1)); fossil_print("%-17s %d\n", "documents:", db_int(0, "SELECT count(*) FROM ftsdocs")); }else{ fossil_print("%-17s disabled\n", "full-text index:"); } @@ -2002,24 +2034,24 @@ const char *zUrl = db_column_text(&q,4); const char *zDocId = db_column_text(&q,0); char *zName; char *z; @ - @
docid:  %d(id) + @
rowid:  %d(id) @
id:%s(zDocId) @
name:%h(db_column_text(&q,1)) @
idxed:%d(db_column_int(&q,2)) @
label:%h(db_column_text(&q,3)) @
url: @ %h(zUrl) @
mtime:%s(db_column_text(&q,5)) - z = db_text(0, "SELECT title FROM ftsidx WHERE docid=%d",id); + z = db_text(0, "SELECT title FROM ftsidx WHERE rowid=%d",id); if( z && z[0] ){ @
title:%h(z) fossil_free(z); } - z = db_text(0, "SELECT body FROM ftsidx WHERE docid=%d",id); + z = db_text(0, "SELECT body FROM ftsidx WHERE rowid=%d",id); if( z && z[0] ){ @
body:%h(z) fossil_free(z); } @
@@ -2103,5 +2135,382 @@ @ %d(cnt3) @ @ style_finish_page(); } + + +/* +** The Fts5MatchinfoCtx bits were all taken verbatim from: +** +** https://sqlite.org/src/finfo?name=ext/fts5/fts5_test_mi.c +*/ + +typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx; + +#ifndef SQLITE_AMALGAMATION +typedef unsigned int u32; +#endif + +struct Fts5MatchinfoCtx { + int nCol; /* Number of cols in FTS5 table */ + int nPhrase; /* Number of phrases in FTS5 query */ + char *zArg; /* nul-term'd copy of 2nd arg */ + int nRet; /* Number of elements in aRet[] */ + u32 *aRet; /* Array of 32-bit unsigned ints to return */ +}; + + +/* +** Return a pointer to the fts5_api pointer for database connection db. +** If an error occurs, return NULL and leave an error in the database +** handle (accessible using sqlite3_errcode()/errmsg()). +*/ +static int fts5_api_from_db(sqlite3 *db, fts5_api **ppApi){ + sqlite3_stmt *pStmt = 0; + int rc; + + *ppApi = 0; + rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_pointer(pStmt, 1, (void*)ppApi, "fts5_api_ptr", 0); + (void)sqlite3_step(pStmt); + rc = sqlite3_finalize(pStmt); + } + + return rc; +} + + +/* +** Argument f should be a flag accepted by matchinfo() (a valid character +** in the string passed as the second argument). If it is not, -1 is +** returned. Otherwise, if f is a valid matchinfo flag, the value returned +** is the number of 32-bit integers added to the output array if the +** table has nCol columns and the query nPhrase phrases. +*/ +static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){ + int ret = -1; + switch( f ){ + case 'p': ret = 1; break; + case 'c': ret = 1; break; + case 'x': ret = 3 * nCol * nPhrase; break; + case 'y': ret = nCol * nPhrase; break; + case 'b': ret = ((nCol + 31) / 32) * nPhrase; break; + case 'n': ret = 1; break; + case 'a': ret = nCol; break; + case 'l': ret = nCol; break; + case 's': ret = nCol; break; + } + return ret; +} + +static int fts5MatchinfoIter( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + Fts5MatchinfoCtx *p, + int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*) +){ + int i; + int n = 0; + int rc = SQLITE_OK; + char f; + for(i=0; (f = p->zArg[i]); i++){ + rc = x(pApi, pFts, p, f, &p->aRet[n]); + if( rc!=SQLITE_OK ) break; + n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f); + } + return rc; +} + +static int fts5MatchinfoXCb( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + void *pUserData +){ + Fts5PhraseIter iter; + int iCol, iOff; + u32 *aOut = (u32*)pUserData; + int iPrev = -1; + + for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff); + iCol>=0; + pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) + ){ + aOut[iCol*3+1]++; + if( iCol!=iPrev ) aOut[iCol*3 + 2]++; + iPrev = iCol; + } + + return SQLITE_OK; +} + +static int fts5MatchinfoGlobalCb( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + Fts5MatchinfoCtx *p, + char f, + u32 *aOut +){ + int rc = SQLITE_OK; + switch( f ){ + case 'p': + aOut[0] = p->nPhrase; + break; + + case 'c': + aOut[0] = p->nCol; + break; + + case 'x': { + int i; + for(i=0; inPhrase && rc==SQLITE_OK; i++){ + void *pPtr = (void*)&aOut[i * p->nCol * 3]; + rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb); + } + break; + } + + case 'n': { + sqlite3_int64 nRow; + rc = pApi->xRowCount(pFts, &nRow); + aOut[0] = (u32)nRow; + break; + } + + case 'a': { + sqlite3_int64 nRow = 0; + rc = pApi->xRowCount(pFts, &nRow); + if( nRow==0 ){ + memset(aOut, 0, sizeof(u32) * p->nCol); + }else{ + int i; + for(i=0; rc==SQLITE_OK && inCol; i++){ + sqlite3_int64 nToken; + rc = pApi->xColumnTotalSize(pFts, i, &nToken); + if( rc==SQLITE_OK){ + aOut[i] = (u32)((2*nToken + nRow) / (2*nRow)); + } + } + } + break; + } + + } + return rc; +} + +static int fts5MatchinfoLocalCb( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + Fts5MatchinfoCtx *p, + char f, + u32 *aOut +){ + int i; + int rc = SQLITE_OK; + + switch( f ){ + case 'b': { + int iPhrase; + int nInt = ((p->nCol + 31) / 32) * p->nPhrase; + for(i=0; inPhrase; iPhrase++){ + Fts5PhraseIter iter; + int iCol; + for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); + iCol>=0; + pApi->xPhraseNextColumn(pFts, &iter, &iCol) + ){ + aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32); + } + } + + break; + } + + case 'x': + case 'y': { + int nMul = (f=='x' ? 3 : 1); + int iPhrase; + + for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0; + + for(iPhrase=0; iPhrasenPhrase; iPhrase++){ + Fts5PhraseIter iter; + int iOff, iCol; + for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); + iOff>=0; + pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) + ){ + aOut[nMul * (iCol + iPhrase * p->nCol)]++; + } + } + + break; + } + + case 'l': { + for(i=0; rc==SQLITE_OK && inCol; i++){ + int nToken; + rc = pApi->xColumnSize(pFts, i, &nToken); + aOut[i] = (u32)nToken; + } + break; + } + + case 's': { + int nInst; + + memset(aOut, 0, sizeof(u32) * p->nCol); + + rc = pApi->xInstCount(pFts, &nInst); + for(i=0; rc==SQLITE_OK && ixInst(pFts, i, &iPhrase, &iCol, &iOff); + iNextPhrase = iPhrase+1; + iNextOff = iOff+pApi->xPhraseSize(pFts, 0); + for(j=i+1; rc==SQLITE_OK && jxInst(pFts, j, &ip, &ic, &io); + if( ic!=iCol || io>iNextOff ) break; + if( ip==iNextPhrase && io==iNextOff ){ + nSeq++; + iNextPhrase = ip+1; + iNextOff = io + pApi->xPhraseSize(pFts, ip); + } + } + + if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq; + } + + break; + } + } + return rc; +} + +static Fts5MatchinfoCtx *fts5MatchinfoNew( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning error message */ + const char *zArg /* Matchinfo flag string */ +){ + Fts5MatchinfoCtx *p; + int nCol; + int nPhrase; + int i; + int nInt; + sqlite3_int64 nByte; + int rc; + + nCol = pApi->xColumnCount(pFts); + nPhrase = pApi->xPhraseCount(pFts); + + nInt = 0; + for(i=0; zArg[i]; i++){ + int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]); + if( n<0 ){ + char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]); + sqlite3_result_error(pCtx, zErr, -1); + sqlite3_free(zErr); + return 0; + } + nInt += n; + } + + nByte = sizeof(Fts5MatchinfoCtx) /* The struct itself */ + + sizeof(u32) * nInt /* The p->aRet[] array */ + + (i+1); /* The p->zArg string */ + p = (Fts5MatchinfoCtx*)sqlite3_malloc64(nByte); + if( p==0 ){ + sqlite3_result_error_nomem(pCtx); + return 0; + } + memset(p, 0, nByte); + + p->nCol = nCol; + p->nPhrase = nPhrase; + p->aRet = (u32*)&p[1]; + p->nRet = nInt; + p->zArg = (char*)&p->aRet[nInt]; + memcpy(p->zArg, zArg, i); + + rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + sqlite3_free(p); + p = 0; + } + + return p; +} + +static void fts5MatchinfoFunc( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ +){ + const char *zArg; + Fts5MatchinfoCtx *p; + int rc = SQLITE_OK; + + if( nVal>0 ){ + zArg = (const char*)sqlite3_value_text(apVal[0]); + }else{ + zArg = "pcx"; + } + + p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0); + if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){ + p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg); + if( p==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); + } + } + + if( rc==SQLITE_OK ){ + rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb); + } + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + }else{ + /* No errors has occured, so return a copy of the array of integers. */ + int nByte = p->nRet * sizeof(u32); + sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT); + } +} + +int db_register_fts5(sqlite3 *db){ + int rc; /* Return code */ + fts5_api *pApi; /* FTS5 API functions */ + + /* Extract the FTS5 API pointer from the database handle. The + ** fts5_api_from_db() function above is copied verbatim from the + ** FTS5 documentation. Refer there for details. */ + rc = fts5_api_from_db(db, &pApi); + if( rc!=SQLITE_OK ) return rc; + + /* If fts5_api_from_db() returns NULL, then either FTS5 is not registered + ** with this database handle, or an error (OOM perhaps?) has occurred. + ** + ** Also check that the fts5_api object is version 2 or newer. + */ + if( pApi==0 || pApi->iVersion<2 ){ + return SQLITE_ERROR; + } + + /* Register the implementation of matchinfo() */ + rc = pApi->xCreateFunction(pApi, "matchinfo", 0, fts5MatchinfoFunc, 0); + + return rc; +} Index: src/setup.c ================================================================== --- src/setup.c +++ src/setup.c @@ -2067,18 +2067,19 @@ search_create_index(); search_fill_index(); search_update_index(search_restrict(SRCH_ALL)); } if( search_index_exists() ){ - @

Currently using an SQLite FTS4 search index. This makes search - @ run faster, especially on large repositories, but takes up space.

+ @

Currently using an SQLite FTS%d(search_index_type(0)) search index. + @ The index helps search run faster, especially on large repositories, + @ but takes up space.

onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); @

@ style_submenu_element("FTS Index Debugging","%R/test-ftsdocs"); }else{ - @

The SQLite FTS4 search index is disabled. All searching will be + @

The SQLite search index is disabled. All searching will be @ a full-text scan. This usually works fine, but can be slow for @ larger repositories.

onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); @

} Index: www/changes.wiki ================================================================== --- www/changes.wiki +++ www/changes.wiki @@ -15,10 +15,11 @@ * Passwords for remembered remote repositories are now stored as irreversible hashes rather than obscured clear-text, for improved security. * Writes to the database are disabled by default if the HTTP request does not come from the same origin. This enhancement is for defense in depth. There where no known attacks prior to this enhancement. + * Update search infrastructure from FTS4 to FTS5.

Changes for version 2.20 (2022-11-16)

* Added the [/help?cmd=chat-timeline-user|chat-timeline-user setting]. If it is not an empty string, then any changes that would appear on the timeline are announced in [./chat.md|the chat room].