Index: src/search.c ================================================================== --- src/search.c +++ src/search.c @@ -1512,11 +1512,14 @@ fossil_print("%s\n",blob_str(&out)); blob_reset(&in); blob_reset(&out); } -/* The schema for the full-text index +/* +** The schema for the full-text index. The %s part must be an empty +** string or a comma followed by additional flags for the FTS virtual +** table. */ static const char zFtsSchema[] = @ -- One entry for each possible search result @ CREATE TABLE IF NOT EXISTS repository.ftsdocs( @ rowid INTEGER PRIMARY KEY, -- Maps to the ftsidx.rowid @@ -1542,18 +1545,100 @@ static const char zFtsDrop[] = @ DROP TABLE IF EXISTS repository.ftsidx; @ DROP VIEW IF EXISTS repository.ftscontent; @ DROP TABLE IF EXISTS repository.ftsdocs; ; + +#if INTERFACE +/* +** Values for the search-tokenizer config option. +*/ +#define FTS5TOK_NONE 0 /* no FTS stemmer */ +#define FTS5TOK_PORTER 1 /* porter stemmer */ +#define FTS5TOK_TRIGRAM 3 /* trigram stemmer */ +#endif + +/* +** Cached FTS5TOK_xyz value for search_tokenizer_type() and +** friends. +*/ +static int iFtsTokenizer = -1; + +/* +** Returns one of the FTS5TOK_xyz values, depending on the value of +** the search-tokenizer config entry, defaulting to FTS5TOK_NONE. The +** result of the first call is cached for subsequent calls unless +** bRecheck is true. +*/ +int search_tokenizer_type(int bRecheck){ + char *z; + if( iFtsTokenizer>=0 && bRecheck==0 ){ + return iFtsTokenizer; + } + z = db_get("search-tokenizer",0); + if( 0==z ){ + iFtsTokenizer = FTS5TOK_NONE; + }else if(0==fossil_strcmp(z,"porter")){ + iFtsTokenizer = FTS5TOK_PORTER; + }else if(0==fossil_strcmp(z,"trigram")){ + iFtsTokenizer = FTS5TOK_TRIGRAM; + }else{ + iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE; + } + fossil_free(z); + return iFtsTokenizer; +} + +/* +** Returns a string value suitable for use as the search-tokenizer +** setting's value, depending on the value of z. If z is 0 then the +** current search-tokenizer value is used as the basis for formulating +** the result (which may differ from the current value but will have +** the same meaning). Any unknown/unsupported value is interpreted as +** "off". +*/ +const char *search_tokenizer_for_string(const char *z){ + char * zTmp = 0; + const char *zRc = 0; + + if( 0==z ){ + z = zTmp = db_get("search-tokenizer",0); + } + if( 0==z ){ + zRc = "off"; + }else if( 0==fossil_strcmp(z,"porter") ){ + zRc = "porter"; + }else if( 0==fossil_strcmp(z,"trigram") ){ + zRc = "trigram"; + }else{ + zRc = is_truth(z) ? "porter" : "off"; + } + fossil_free(zTmp); + return zRc; +} + +/* +** Sets the search-tokenizer config setting to the value of +** search_tokenizer_for_string(zName). +*/ +void search_set_tokenizer(const char *zName){ + db_set("search-tokenizer", search_tokenizer_for_string( zName ), 0); + iFtsTokenizer = -1; +} /* ** Create or drop the tables associated with a full-text index. */ static int searchIdxExists = -1; void search_create_index(void){ - int useStemmer = db_get_boolean("search-stemmer",0); - const char *zExtra = useStemmer ? ",tokenize=porter" : ""; + const int useTokenizer = search_tokenizer_type(0); + const char *zExtra; + switch(useTokenizer){ + case FTS5TOK_PORTER: zExtra = ",tokenize=porter"; break; + case FTS5TOK_TRIGRAM: zExtra = ",tokenize=trigram"; break; + default: zExtra = ""; break; + } search_sql_setup(g.db); db_multi_exec(zFtsSchema/*works-like:"%s"*/, zExtra/*safe-for-%s*/); searchIdxExists = 1; } void search_drop_index(void){ @@ -1894,12 +1979,14 @@ ** enable cdtwe Enable various kinds of search. c=Check-ins, ** d=Documents, t=Tickets, w=Wiki, e=Tech Notes. ** ** disable cdtwe Disable various kinds of search ** -** stemmer (on|off) Turn the Porter stemmer on or off for indexed -** search. (Unindexed search is never stemmed.) +** tokenizer VALUE Select a tokenizer for indexed search. VALUE +** may be one of (porter, on, off, trigram), and +** "on" is equivalent to "porter". Unindexed +** search never uses tokenization or stemming. ** ** The current search settings are displayed after any changes are applied. ** Run this command with no arguments to simply see the settings. */ void fts_config_cmd(void){ @@ -1909,11 +1996,11 @@ } aCmd[] = { { 1, "reindex" }, { 2, "index" }, { 3, "disable" }, { 4, "enable" }, - { 5, "stemmer" }, + { 5, "tokenizer"}, }; static const struct { const char *zSetting; const char *zName; const char *zSw; @@ -1966,16 +2053,23 @@ for(j=0; j=1 ){ search_drop_index(); } @@ -1986,12 +2080,12 @@ /* Always show the status before ending */ for(i=0; i[Older]

} style_finish_page(); } + +/* +** Renders a selection list of values for the search-tokenizer +** setting, using the form field name "ftstok". +*/ +static void select_fts_tokenizer(void){ + const char *const aTokenizer[] = { + "off", "None", + "porter", "Porter Stemmer", + "trigram", "Trigram" + }; + multiple_choice_attribute("FTS Tokenizer", "search-tokenizer", + "ftstok", "off", 3, aTokenizer); +} + /* ** WEBPAGE: srchsetup ** ** Configure the search engine. Requires Admin privilege. */ @@ -2063,28 +2078,30 @@ @

@
if( P("fts0") ){ search_drop_index(); }else if( P("fts1") ){ + const char *zTokenizer = PD("ftstok","off"); + search_set_tokenizer(zTokenizer); search_drop_index(); search_create_index(); search_fill_index(); search_update_index(search_restrict(SRCH_ALL)); } if( search_index_exists() ){ @

Currently using an SQLite FTS%d(search_index_type(0)) search index. @ The index helps search run faster, especially on large repositories, @ but takes up space.

- onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); + select_fts_tokenizer(); @

@ style_submenu_element("FTS Index Debugging","%R/test-ftsdocs"); }else{ @

The SQLite search index is disabled. All searching will be @ a full-text scan. This usually works fine, but can be slow for @ larger repositories.

- onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0); + select_fts_tokenizer(); @

} @ style_finish_page(); } Index: www/changes.wiki ================================================================== --- www/changes.wiki +++ www/changes.wiki @@ -4,10 +4,12 @@ * The stock OCI container no longer includes BusyBox, thus no longer needs to start as root to chroot that power away. That in turn frees us from needing to build and install the container as root, since it no longer has to create a private /dev tree inside the jail for Fossil's use. + * Add support for the trigram tokenizer for FTS5 search to enable + searching in Chinese.

Changes for version 2.21 (2023-02-25)

* Users can request a password reset. This feature is disabledby default. Use the new [/help?cmd=self-pw-reset|self-pw-reset property] to enable it. New web pages [/help?cmd=/resetpw|/resetpw] and @@ -64,17 +66,17 @@ numeric order even if they contain a different number of digits. (Example: "fossil_80_..." comes before "fossil_100.png" in the [/dir?ci=92fd091703a28c07&name=skins/blitz|/skins/blitz] directory listing.) * Enhancements to the graph layout algorithm design to improve readability and promote better situational awareness. - * Performance enhancement for the + * Performance enhancement for the [./checkin_names.wiki#root|"root:BRANCHNAME" style of tag], accomplished using a Common Table Expression in the underlying SQL. * Sort tag listings (command line and webpage) by taking numbers into consideration so as to cater for tags that follow semantic versioning. * On the wiki listings, omit by default wiki pages that are associated with - check-ins and branches. + check-ins and branches. * Add the new "[/help?cmd=describe|fossil describe]" command. * Markdown subsystem extended with [../src/markdown.md#ftnts|footnotes support]. See corresponding [../test/markdown-test3.md|test cases], [/wiki?name=branch/markdown-footnotes#il|known limitations] and [forum:/forumthread/ee1f1597e46ec07a|discussion]. @@ -123,11 +125,11 @@ * Promote the test-detach command into the [/help?cmd=detach|detach command]. * For "[/help?cmd=pull|fossil pull]" with the --from-parent-project option, if no URL is specified then use the last URL from the most recent prior "fossil pull --from-parent-project". - * Add options --project-name and --project-desc to the + * Add options --project-name and --project-desc to the "[/help?cmd=init|fossil init]" command. * The [/help?cmd=/ext|/ext page] generates the SERVER_SOFTWARE environment variable for clients. * Fix the REQUEST_URI [/doc/trunk/www/aboutcgi.wiki#cgivar|CGI variable] such that it includes the query string. This is how most other systems understand @@ -145,20 +147,20 @@
  • Performance improvements * The --branchcolor option on [/help?cmd=commit|fossil commit] and [/help?cmd=amend|fossil amend] can now take the value "auto" to force Fossil to use its built-in automatic color choosing algorithm. - * Fossil now [./concepts.wiki#workflow|autosyncs] prior to running + * Fossil now [./concepts.wiki#workflow|autosyncs] prior to running [/help?cmd=open|fossil open]. * Add the [/help?cmd=ticket-default-report|ticket-default-report setting], which if set to the title of a ticket report causes that ticket report to be displayed below the search box in the /ticket page. * The "nc" query parameter to the [/help?cmd=/timeline|/timeline] page causes all graph coloring to be omitted. * Improvements and bug fixes to the new "[/help?cmd=ui|fossil ui REMOTE]" feature so that it works better on a wider variety of platforms. - * In [/help?cmd=/wikiedit|/wikiedit], show the list of attachments for + * In [/help?cmd=/wikiedit|/wikiedit], show the list of attachments for the current page and list URLs suitable for pasting them into the page. * Add the --no-http-compression option to [/help?cmd=sync|fossil sync] and similar. * Print total payload bytes on a [/help?cmd=sync|fossil sync] when using the --verbose option. @@ -249,11 +251,11 @@ * The [./defcsp.md|default CSP] has been relaxed slightly to allow images to be loaded from any URL. All other resources are still locked down by default. * The built-in skins all use the "[/help?cmd=mainmenu|mainmenu]" setting to determine the content of the main menu. - The ability to edit the + The ability to edit the "mainmenu" setting is added on the /Admin/Configuration page. * The hamburger menu is now available on most of the built-in skins. * Any built-in skin named "X" can be used instead of the standard repository skin by adding the URL parameter skin=X to the request. The selection is persisted using the display @@ -264,43 +266,43 @@ /sitemap, so that it appears in hamburger menus. * The [/sitemap] extensions are now specified by a single new "[/help?cmd=sitemap-extra|sitemap-extra setting]", rather than a cluster of various "sitemap-*" settings. The older settings are no longer used. - This change might require minor server configuration + This change might require minor server configuration adjustments on servers that use /sitemap extensions. The /Admin/Configuration page provides the ability to edit the new "sitemap-extra" setting. - * Added the "--ckout-alias NAME" option to + * Added the "--ckout-alias NAME" option to [/help?cmd=ui|fossil ui], [/help?cmd=server|fossil server], and [/help?cmd=http|fossil http]. This option causes Fossil to understand URIs of the form "/doc/NAME/..." as if they were "[/help?cmd=/doc|/doc/ckout/...]", to facilitate testing of [./embeddeddoc.wiki|embedded documentation] changes prior to check-in. * For diff web pages, if the diff type (unified versus side-by-side) - is not specified by a query parameter, and if the + is not specified by a query parameter, and if the "[/help?cmd=preferred-diff-type|preferred-diff-type]" setting is omitted or less than 1, then select the diff type based on a guess of whether or not the request is coming from a mobile device. Mobile gets unified and desktop gets side-by-side. * The various pages which show diffs now have toggles to show/hide individual diffs. * Add the "[/help?cmd=preferred-diff-type|preferred-diff-type]" setting to allow an admin to force a default diff type. - * The "pikchr-background" settings is now available in + * The "pikchr-background" settings is now available in "detail.txt" skin files, for better control of Pikchr colors in inverted color schemes. - * Add the --list option to the + * Add the --list option to the [/help?cmd=tarball|tarball], [/help?cmd=zip|zip], and [/help?cmd=sqlar|sqlar] commands. * The javascript used to implement the hamburger menu on the default built-in skin has been made generic so that it is usable by a variety of skins, and promoted to an ordinary built-in javascript file. - * New TH1 commands: + * New TH1 commands: "[/doc/trunk/www/th1.md#bireqjs|builtin_request_js]", "[/doc/trunk/www/th1.md#capexpr|capexpr]", "foreach", "lappend", and "string match" * The [/help/leaves|leaves command] now shows the branch point of each leaf. @@ -333,11 +335,11 @@ * Schema Update Notice #1: This release drops a trigger from the database schema (replacing it with a TEMP trigger that is created as needed). This change happens automatically the first time you add content to a repository using Fossil 2.14 or later. No - action is needed on your part. However, if you upgrade to + action is needed on your part. However, if you upgrade to version 2.14 and then later downgrade or otherwise use an earlier version of Fossil, the email notification mechanism may fail to send out notifications for some events, due to the missing trigger. If you want to permanently downgrade an installation, then you should run @@ -358,11 +360,11 @@ from the remote URL and the newly cloned repo is opened. This makes the clone command work more like Git, thus making it easier for people transitioning from Git. * Added the --mainbranch option to the [/help?cmd=git|fossil git export] command. - * Added the --format option to the + * Added the --format option to the "[/help?cmd=timeline|fossil timeline]" command. * Enhance the --numstat option on the "[/help?cmd=diff|fossil diff]" command so that it shows a total number of lines added and deleted and total number of files modified. @@ -480,11 +482,11 @@ of the checkin that occured on 2020-06-27 15:06 going back to the 2.11 release. * Update the built-in SQLite so that the "[/help?cmd=sql|fossil sql]" command supports new output modes ".mode box" and ".mode json". - * Add the "obscure()" SQL function to the + * Add the "obscure()" SQL function to the "[/help?cmd=sql|fossil sql]" command. * Added virtual tables "helptext" and "builtin" to the "[/help?cmd=sql|fossil sql]" command, providing access to the dispatch table including all help text, and the builtin data files, respectively. @@ -517,11 +519,11 @@ to work without the rebuild, but the new backlinks will be missing. * The algorithm for finding the [./tech_overview.wiki#configloc|location of the configuration database] is enhanced to be XDG-compliant. * Add a hide/show feature to - [./wikitheory.wiki#assocwiki|associated wiki] display on + [./wikitheory.wiki#assocwiki|associated wiki] display on check-in and branch information pages. * Enhance the "[/help?cmd=info|fossil info]" command so that it works with no arguments even if not within an open check-out. * Many improvements to the forum and especially email notification of forum posts, in response to community feedback after switching