Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix some problems with building fts5 and fts3 together using the amalgamation. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
fb10bbb9f9c4481e6043d323a3018a4e |
User & Date: | dan 2015-02-02 11:32:20.159 |
Context
2015-02-02
| ||
11:58 | Ensure generated header file fts5parse.h is included in sqlite3.c. (check-in: bc7be2fcfd user: dan tags: fts5) | |
11:32 | Fix some problems with building fts5 and fts3 together using the amalgamation. (check-in: fb10bbb9f9 user: dan tags: fts5) | |
09:40 | Merge latest trunk changes with this branch. (check-in: 76212f2c9a user: dan tags: fts5) | |
Changes
Changes to ext/fts3/unicode/mkunicode.tcl.
︙ | ︙ | |||
113 114 115 116 117 118 119 | puts "** If the argument is a codepoint corresponding to a lowercase letter" puts "** in the ASCII range with a diacritic added, return the codepoint" puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN" puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER" puts "** E\"). The resuls of passing a codepoint that corresponds to an" puts "** uppercase letter are undefined." puts "*/" | | | 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | puts "** If the argument is a codepoint corresponding to a lowercase letter" puts "** in the ASCII range with a diacritic added, return the codepoint" puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN" puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER" puts "** E\"). The resuls of passing a codepoint that corresponds to an" puts "** uppercase letter are undefined." puts "*/" puts "static int ${::remove_diacritic}(int c)\{" puts " unsigned short aDia\[\] = \{" puts -nonewline " 0, " set i 1 foreach r $lRange { foreach {iCode nRange} $r {} if {($i % 8)==0} {puts "" ; puts -nonewline " " } incr i |
︙ | ︙ | |||
622 623 624 625 626 627 628 | if {[tl_print_table_entry toggle $entry $liOff]} { lappend lHigh $entry } } tl_print_table_footer toggle tl_print_ioff_table $liOff | | | 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 | if {[tl_print_table_entry toggle $entry $liOff]} { lappend lHigh $entry } } tl_print_table_footer toggle tl_print_ioff_table $liOff puts [subst -nocommands { int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); |
︙ | ︙ | |||
655 656 657 658 659 660 661 | const struct TableEntry *p = &aEntry[iRes]; if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } } | | | | 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 | const struct TableEntry *p = &aEntry[iRes]; if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } } if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret); } }] foreach entry $lHigh { tl_print_if_entry $entry } puts "" puts " return ret;" |
︙ | ︙ | |||
768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 | puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>" exit 1 } if {[llength $argv]<2} usage set unicodedata.txt [lindex $argv end] set casefolding.txt [lindex $argv end-1] set generate_test_code 0 set generate_fts5_code 0 set function_prefix "sqlite3Fts" for {set i 0} {$i < [llength $argv]-2} {incr i} { switch -- [lindex $argv $i] { -test { set generate_test_code 1 } -fts5 { set function_prefix sqlite3Fts5 set generate_fts5_code 1 } default { usage } } } | > > | 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 | puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>" exit 1 } if {[llength $argv]<2} usage set unicodedata.txt [lindex $argv end] set casefolding.txt [lindex $argv end-1] set remove_diacritic remove_diacritic set generate_test_code 0 set generate_fts5_code 0 set function_prefix "sqlite3Fts" for {set i 0} {$i < [llength $argv]-2} {incr i} { switch -- [lindex $argv $i] { -test { set generate_test_code 1 } -fts5 { set function_prefix sqlite3Fts5 set generate_fts5_code 1 set remove_diacritic fts5_remove_diacritic } default { usage } } } |
︙ | ︙ |
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
12 13 14 15 16 17 18 | ** */ #ifndef _FTS5INT_H #define _FTS5INT_H #include "fts5.h" #include "sqliteInt.h" | < | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | ** */ #ifndef _FTS5INT_H #define _FTS5INT_H #include "fts5.h" #include "sqliteInt.h" /* ** Maximum number of prefix indexes on single FTS5 table. This must be ** less than 32. If it is set to anything large than that, an #error ** directive in fts5_index.c will cause the build to fail. */ |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
21 22 23 24 25 26 27 | #define FTS5_EOF 0 typedef struct Fts5ExprTerm Fts5ExprTerm; /* ** Functions generated by lemon from fts5parse.y. */ | | | 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | #define FTS5_EOF 0 typedef struct Fts5ExprTerm Fts5ExprTerm; /* ** Functions generated by lemon from fts5parse.y. */ void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); struct Fts5Expr { Fts5Index *pIndex; Fts5ExprNode *pRoot; int bDesc; /* Iterate in descending docid order */ |
︙ | ︙ | |||
175 176 177 178 179 180 181 | } } *pz = &pToken->p[pToken->n]; return tok; } | | | 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | } } *pz = &pToken->p[pToken->n]; return tok; } static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); } static void fts5ParseFree(void *p){ sqlite3_free(p); } int sqlite3Fts5ExprNew( Fts5Config *pConfig, /* FTS5 Configuration */ const char *zExpr, /* Expression text */ Fts5Expr **ppNew, char **pzErr |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
3789 3790 3791 3792 3793 3794 3795 | if( pLeaf ){ if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } fts5DlidxIterFree(pDlidx); | | | 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 | if( pLeaf ){ if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } fts5DlidxIterFree(pDlidx); fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); } } if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; } |
︙ | ︙ |
Changes to ext/fts5/fts5_tokenize.c.
︙ | ︙ | |||
557 558 559 560 561 562 563 564 565 566 567 568 569 570 | const char *zSuffix; int nSuffix; int (*xCond)(char *zStem, int nStem); const char *zOutput; int nOutput; }; static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ int ret = -1; int nBuf = *pnBuf; PorterRule *p; for(p=aRule; p->zSuffix; p++){ assert( strlen(p->zSuffix)==p->nSuffix ); | > | 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 | const char *zSuffix; int nSuffix; int (*xCond)(char *zStem, int nStem); const char *zOutput; int nOutput; }; #if 0 static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ int ret = -1; int nBuf = *pnBuf; PorterRule *p; for(p=aRule; p->zSuffix; p++){ assert( strlen(p->zSuffix)==p->nSuffix ); |
︙ | ︙ | |||
580 581 582 583 584 585 586 587 588 589 590 591 592 593 | *pnBuf = nStem + p->nOutput; ret = p - aRule; } } return ret; } static int fts5PorterIsVowel(char c, int bYIsVowel){ return ( c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') ); } | > | 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 | *pnBuf = nStem + p->nOutput; ret = p - aRule; } } return ret; } #endif static int fts5PorterIsVowel(char c, int bYIsVowel){ return ( c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') ); } |
︙ | ︙ |
Changes to ext/fts5/fts5_unicode2.c.
︙ | ︙ | |||
34 35 36 37 38 39 40 | ** The most significant 22 bits in each 32-bit value contain the first ** codepoint in the range. The least significant 10 bits are used to store ** the size of the range (always at least 1). In other words, the value ** ((C<<22) + N) represents a range of N codepoints starting with codepoint ** C. It is not possible to represent a range larger than 1023 codepoints ** using this format. */ | | | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | ** The most significant 22 bits in each 32-bit value contain the first ** codepoint in the range. The least significant 10 bits are used to store ** the size of the range (always at least 1). In other words, the value ** ((C<<22) + N) represents a range of N codepoints starting with codepoint ** C. It is not possible to represent a range larger than 1023 codepoints ** using this format. */ static const unsigned int aEntry[] = { 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, |
︙ | ︙ | |||
126 127 128 129 130 131 132 | 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, }; if( c<128 ){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; | | | 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, }; if( c<128 ){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes = 0; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; if( key >= aEntry[iTest] ){ iRes = iTest; iLo = iTest+1; |
︙ | ︙ | |||
154 155 156 157 158 159 160 | ** If the argument is a codepoint corresponding to a lowercase letter ** in the ASCII range with a diacritic added, return the codepoint ** of the ASCII letter only. For example, if passed 235 - "LATIN ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER ** E"). The resuls of passing a codepoint that corresponds to an ** uppercase letter are undefined. */ | | | 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | ** If the argument is a codepoint corresponding to a lowercase letter ** in the ASCII range with a diacritic added, return the codepoint ** of the ASCII letter only. For example, if passed 235 - "LATIN ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER ** E"). The resuls of passing a codepoint that corresponds to an ** uppercase letter are undefined. */ static int fts5_remove_diacritic(int c){ unsigned short aDia[] = { 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, |
︙ | ︙ | |||
197 198 199 200 201 202 203 | iLo = iTest+1; }else{ iHi = iTest-1; } } assert( key>=aDia[iRes] ); return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); | < > | 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | iLo = iTest+1; }else{ iHi = iTest-1; } } assert( key>=aDia[iRes] ); return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); } /* ** Return true if the argument interpreted as a unicode codepoint ** is a diacritical modifier character. */ int sqlite3Fts5UnicodeIsdiacritic(int c){ |
︙ | ︙ | |||
347 348 349 350 351 352 353 | const struct TableEntry *p = &aEntry[iRes]; if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } } | | | 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 | const struct TableEntry *p = &aEntry[iRes]; if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } } if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret); } else if( c>=66560 && c<66600 ){ ret = c + 40; } return ret; } #endif /* defined(SQLITE_ENABLE_FTS5) */ |
Changes to main.mk.
︙ | ︙ | |||
239 240 241 242 243 244 245 | $(TOP)/ext/fts5/fts5.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ $(TOP)/ext/fts5/fts5_storage.c \ | | > | 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | $(TOP)/ext/fts5/fts5.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ $(TOP)/ext/fts5/fts5_storage.c \ $(TOP)/ext/fts5/fts5_tokenize.c \ $(TOP)/ext/fts5/fts5_unicode2.c # Generated source code files # SRC += \ keywordhash.h \ opcodes.c \ |
︙ | ︙ | |||
630 631 632 633 634 635 636 | $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_unicode2.c fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y mv fts5parse.c fts5parse.c.orig | | > | 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 | $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_unicode2.c fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y mv fts5parse.c fts5parse.c.orig cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \ | sed 's/TOKEN/FTS5TOKEN/g' > fts5parse.c userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c # Rules for building test programs and for running tests |
︙ | ︙ |
Changes to tool/mksqlite3c.tcl.
︙ | ︙ | |||
338 339 340 341 342 343 344 345 346 347 348 349 350 351 | fts5_config.c fts5_expr.c fts5_hash.c fts5_index.c fts5parse.c fts5_storage.c fts5_tokenize.c rtree.c icu.c fts3_icu.c } { copy_file tsrc/$file } | > | 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 | fts5_config.c fts5_expr.c fts5_hash.c fts5_index.c fts5parse.c fts5_storage.c fts5_tokenize.c fts5_unicode2.c rtree.c icu.c fts3_icu.c } { copy_file tsrc/$file } |
︙ | ︙ |