Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Implementation of "column:" modifiers in FTS1 queries. (CVS 3411) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
820634f71e3a3499994f82b56b784d22 |
User & Date: | drh 2006-09-13 16:02:44.000 |
Context
2006-09-13
| ||
17:17 | Minor code cleanup in FTS1. (CVS 3412) (check-in: fca5928167 user: drh tags: trunk) | |
16:02 | Implementation of "column:" modifiers in FTS1 queries. (CVS 3411) (check-in: 820634f71e user: drh tags: trunk) | |
15:20 | Module spec parser enhancements for FTS1. Now able to cope with column names in the spec that are SQL keywords or have special characters, etc. Also added support for additional control lines. Column names can be followed by a type specifier (which is ignored.) (CVS 3410) (check-in: adb780e0dc user: drh tags: trunk) | |
Changes
Changes to ext/fts1/fts1.c.
︙ | ︙ | |||
1952 1953 1954 1955 1956 1957 1958 | } } /* A single term in a query is represented by an instances of ** the following structure. */ typedef struct QueryTerm { | | > | | | 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 | } } /* A single term in a query is represented by an instances of ** the following structure. */ typedef struct QueryTerm { short int nPhrase; /* How many following terms are part of the same phrase */ short int iColumn; /* Column of the index that must match this term */ signed char isOr; /* this term is preceded by "OR" */ signed char isNot; /* this term is preceded by "-" */ char *pTerm; /* text of the term. '\000' terminated. malloced */ int nTerm; /* Number of bytes in pTerm[] */ } QueryTerm; /* Return a DocList corresponding to the query term *pTerm. If *pTerm ** is the first term of a phrase query, go ahead and evaluate the phrase |
︙ | ︙ | |||
2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 | * A NOT term cannot be the right-hand operand of an OR. If this * occurs in the query string, the NOT is ignored: * * [one OR -two] ==> one OR two * */ typedef struct Query { int nTerms; /* Number of terms in the query */ QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ int nextIsOr; /* Set the isOr flag on the next inserted term */ } Query; /* Add a new term pTerm[0..nTerm-1] to the query *q. */ static void queryAdd(Query *q, const char *pTerm, int nTerm){ QueryTerm *t; ++q->nTerms; q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); if( q->pTerms==0 ){ q->nTerms = 0; return; } t = &q->pTerms[q->nTerms - 1]; memset(t, 0, sizeof(*t)); t->pTerm = malloc(nTerm+1); memcpy(t->pTerm, pTerm, nTerm); t->pTerm[nTerm] = 0; t->nTerm = nTerm; t->isOr = q->nextIsOr; q->nextIsOr = 0; } /* Free all of the memory that was malloced in order to build *q. */ static void queryDestroy(Query *q){ int i; for(i = 0; i < q->nTerms; ++i){ free(q->pTerms[i].pTerm); } free(q->pTerms); } /* ** Parse the text at pSegment[0..nSegment-1]. Add additional terms ** to the query being assemblied in pQuery. ** ** inPhrase is true if pSegment[0..nSegement-1] is contained within ** double-quotes. If inPhrase is true, then the first term ** is marked with the number of terms in the phrase less one and ** OR and "-" syntax is ignored. If inPhrase is false, then every ** term found is marked with nPhrase=0 and OR and "-" syntax is significant. */ static int tokenizeSegment( sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ const char *pSegment, int nSegment, /* Query expression being parsed */ int inPhrase, /* True if within "..." */ Query *pQuery /* Append results here */ ){ const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; sqlite3_tokenizer_cursor *pCursor; int firstIndex = pQuery->nTerms; int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; while( 1 ){ const char *pToken; int nToken, iBegin, iEnd, iPos; rc = pModule->xNext(pCursor, &pToken, &nToken, &iBegin, &iEnd, &iPos); if( rc!=SQLITE_OK ) break; if( !inPhrase && pQuery->nTerms>0 && nToken==2 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ pQuery->nextIsOr = 1; continue; } queryAdd(pQuery, pToken, nToken); if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 | * A NOT term cannot be the right-hand operand of an OR. If this * occurs in the query string, the NOT is ignored: * * [one OR -two] ==> one OR two * */ typedef struct Query { fulltext_vtab *pFts; /* The full text index */ int nTerms; /* Number of terms in the query */ QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ int nextIsOr; /* Set the isOr flag on the next inserted term */ int iColumn; /* Text word parsed must be in this column */ } Query; /* Add a new term pTerm[0..nTerm-1] to the query *q. */ static void queryAdd(Query *q, const char *pTerm, int nTerm){ QueryTerm *t; ++q->nTerms; q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); if( q->pTerms==0 ){ q->nTerms = 0; return; } t = &q->pTerms[q->nTerms - 1]; memset(t, 0, sizeof(*t)); t->pTerm = malloc(nTerm+1); memcpy(t->pTerm, pTerm, nTerm); t->pTerm[nTerm] = 0; t->nTerm = nTerm; t->isOr = q->nextIsOr; q->nextIsOr = 0; t->iColumn = q->iColumn; q->iColumn = -1; } /* Free all of the memory that was malloced in order to build *q. */ static void queryDestroy(Query *q){ int i; for(i = 0; i < q->nTerms; ++i){ free(q->pTerms[i].pTerm); } free(q->pTerms); } /* ** Check to see if the string zToken[0...nToken-1] matches any ** column name in the virtual table. If it does, ** return the zero-indexed column number. If not, return -1. */ static int checkColumnSpecifier( fulltext_vtab *pVtab, /* The virtual table */ const char *zToken, /* Text of the token */ int nToken /* Number of characters in the token */ ){ int i; for(i=0; i<pVtab->nColumn; i++){ if( memcmp(pVtab->azColumn[i], zToken, nToken)==0 && pVtab->azColumn[i][nToken]==0 ){ return i; } } return -1; } /* ** Parse the text at pSegment[0..nSegment-1]. Add additional terms ** to the query being assemblied in pQuery. ** ** inPhrase is true if pSegment[0..nSegement-1] is contained within ** double-quotes. If inPhrase is true, then the first term ** is marked with the number of terms in the phrase less one and ** OR and "-" syntax is ignored. If inPhrase is false, then every ** term found is marked with nPhrase=0 and OR and "-" syntax is significant. */ static int tokenizeSegment( sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ const char *pSegment, int nSegment, /* Query expression being parsed */ int inPhrase, /* True if within "..." */ Query *pQuery /* Append results here */ ){ const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; sqlite3_tokenizer_cursor *pCursor; int firstIndex = pQuery->nTerms; int iCol; int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; while( 1 ){ const char *pToken; int nToken, iBegin, iEnd, iPos; rc = pModule->xNext(pCursor, &pToken, &nToken, &iBegin, &iEnd, &iPos); if( rc!=SQLITE_OK ) break; if( !inPhrase && pSegment[iEnd]==':' && (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ pQuery->iColumn = iCol; continue; } if( !inPhrase && pQuery->nTerms>0 && nToken==2 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ pQuery->nextIsOr = 1; continue; } queryAdd(pQuery, pToken, nToken); if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ |
︙ | ︙ | |||
2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 | Query *pQuery){ int iInput, inPhrase = 0; if( nInput<0 ) nInput = strlen(pInput); pQuery->nTerms = 0; pQuery->pTerms = NULL; pQuery->nextIsOr = 0; for(iInput=0; iInput<nInput; ++iInput){ int i; for(i=iInput; i<nInput && pInput[i]!='"'; ++i) ; if( i>iInput ){ tokenizeSegment(v->pTokenizer, pInput+iInput, i-iInput, inPhrase, | > > | 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 | Query *pQuery){ int iInput, inPhrase = 0; if( nInput<0 ) nInput = strlen(pInput); pQuery->nTerms = 0; pQuery->pTerms = NULL; pQuery->nextIsOr = 0; pQuery->iColumn = -1; pQuery->pFts = v; for(iInput=0; iInput<nInput; ++iInput){ int i; for(i=iInput; i<nInput && pInput[i]!='"'; ++i) ; if( i>iInput ){ tokenizeSegment(v->pTokenizer, pInput+iInput, i-iInput, inPhrase, |
︙ | ︙ | |||
2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 | } return SQLITE_OK; } /* Perform a full-text query using the search expression in ** pInput[0..nInput-1]. Return a list of matching documents ** in pResult. */ static int fulltextQuery(fulltext_vtab *v, int iColumn, const char *pInput, int nInput, DocList **pResult){ Query q; int i, rc; DocList *pLeft = NULL; DocList *pRight, *pNew; int nNot = 0; rc = parseQuery(v, pInput, nInput, &q); if( rc!=SQLITE_OK ) return rc; /* Merge AND terms. */ for(i = 0 ; i < q.nTerms; i += q.pTerms[i].nPhrase + 1){ if( q.pTerms[i].isNot ){ /* Handle all NOT terms in a separate pass */ nNot++; continue; } | > > > > > > | | 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 | } return SQLITE_OK; } /* Perform a full-text query using the search expression in ** pInput[0..nInput-1]. Return a list of matching documents ** in pResult. ** ** Queries must match column iColumn. Or if iColumn>=nColumn ** they are allowed to match against any column. */ static int fulltextQuery(fulltext_vtab *v, int iColumn, const char *pInput, int nInput, DocList **pResult){ Query q; int i, rc; DocList *pLeft = NULL; DocList *pRight, *pNew; int nNot = 0; int iCol; rc = parseQuery(v, pInput, nInput, &q); if( rc!=SQLITE_OK ) return rc; /* Merge AND terms. */ for(i = 0 ; i < q.nTerms; i += q.pTerms[i].nPhrase + 1){ if( q.pTerms[i].isNot ){ /* Handle all NOT terms in a separate pass */ nNot++; continue; } iCol = q.pTerms[i].iColumn; if( iCol<0 ) iCol = iColumn; rc = docListOfTerm(v, iCol, &q.pTerms[i], &pRight); if( rc ){ queryDestroy(&q); return rc; } if( pLeft==0 ){ pLeft = pRight; }else{ |
︙ | ︙ | |||
2194 2195 2196 2197 2198 2199 2200 | /* We do not yet know how to handle a query of only NOT terms */ return SQLITE_ERROR; } /* Do the EXCEPT terms */ for(i=0; i<q.nTerms; i += q.pTerms[i].nPhrase + 1){ if( !q.pTerms[i].isNot ) continue; | > > | | 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 | /* We do not yet know how to handle a query of only NOT terms */ return SQLITE_ERROR; } /* Do the EXCEPT terms */ for(i=0; i<q.nTerms; i += q.pTerms[i].nPhrase + 1){ if( !q.pTerms[i].isNot ) continue; iCol = q.pTerms[i].iColumn; if( iCol<0 ) iCol = iColumn; rc = docListOfTerm(v, iCol, &q.pTerms[i], &pRight); if( rc ){ queryDestroy(&q); docListDelete(pLeft); return rc; } pNew = docListNew(DL_DOCIDS); docListExceptMerge(pLeft, pRight, pNew); |
︙ | ︙ |
Changes to test/fts1b.test.
1 2 3 4 5 6 7 8 9 10 11 12 13 | # 2006 September 13 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS1 module. # | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | # 2006 September 13 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS1 module. # # $Id: fts1b.test,v 1.3 2006/09/13 16:02:44 drh Exp $ # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS1 is defined, omit this file. ifcapable !fts1 { |
︙ | ︙ | |||
84 85 86 87 88 89 90 91 92 | do_test fts1b-2.1 { execsql { CREATE VIRTUAL TABLE t2 USING fts1(from,to); INSERT INTO t2([from],[to]) VALUES ('one two three', 'four five six'); SELECT [from], [to] FROM t2 } } {{one two three} {four five six}} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | do_test fts1b-2.1 { execsql { CREATE VIRTUAL TABLE t2 USING fts1(from,to); INSERT INTO t2([from],[to]) VALUES ('one two three', 'four five six'); SELECT [from], [to] FROM t2 } } {{one two three} {four five six}} # Compute an SQL string that contains the words one, two, three,... to # describe bits set in the value $i. Only the lower 5 bits are examined. # proc wordset {i} { set x {} for {set j 0; set k 1} {$j<5} {incr j; incr k $k} { if {$k&$i} {lappend x [lindex {one two three four five} $j]} } return '$x' } # Create a new FTS table with three columns: # # norm: words for the bits of rowid # plusone: words for the bits of rowid+1 # invert: words for the bits of ~rowid # db eval { CREATE VIRTUAL TABLE t4 USING fts1([norm],'plusone',"invert"); } for {set i 1} {$i<=15} {incr i} { set vset [list [wordset $i] [wordset [expr {$i+1}]] [wordset [expr {~$i}]]] db eval "INSERT INTO t4(norm,plusone,invert) VALUES([join $vset ,]);" } do_test fts1b-4.1 { execsql {SELECT rowid FROM t4 WHERE _all MATCH 'norm:one'} } {1 3 5 7 9 11 13 15} do_test fts1b-4.2 { execsql {SELECT rowid FROM t4 WHERE norm MATCH 'one'} } {1 3 5 7 9 11 13 15} do_test fts1b-4.3 { execsql {SELECT rowid FROM t4 WHERE _all MATCH 'one'} } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15} do_test fts1b-4.4 { execsql {SELECT rowid FROM t4 WHERE _all MATCH 'plusone:one'} } {2 4 6 8 10 12 14} do_test fts1b-4.5 { execsql {SELECT rowid FROM t4 WHERE plusone MATCH 'one'} } {2 4 6 8 10 12 14} do_test fts1b-4.6 { execsql {SELECT rowid FROM t4 WHERE _all MATCH 'norm:one plusone:two'} } {1 5 9 13} do_test fts1b-4.7 { execsql {SELECT rowid FROM t4 WHERE _all MATCH 'norm:one two'} } {1 3 5 7 9 11 13 15} do_test fts1b-4.8 { execsql {SELECT rowid FROM t4 WHERE _all MATCH 'plusone:two norm:one'} } {1 5 9 13} do_test fts1b-4.9 { execsql {SELECT rowid FROM t4 WHERE _all MATCH 'two norm:one'} } {1 3 5 7 9 11 13 15} finish_test |