Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Enhance the fuzzer virtual table to support multiple rule sets. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
a82938731b21d6166d7d482994cb065c |
User & Date: | drh 2012-02-14 15:34:50.192 |
Context
2012-02-14
| ||
18:56 | Increase the maximum ruleset id in the fuzzer from 50 to 2^31-1. (check-in: 760e009adc user: drh tags: trunk) | |
15:34 | Enhance the fuzzer virtual table to support multiple rule sets. (check-in: a82938731b user: drh tags: trunk) | |
2012-02-13
| ||
21:24 | Merge the non-blocking ROLLBACK changes into trunk. (check-in: 9c572d424a user: drh tags: trunk) | |
Changes
Changes to src/test_fuzzer.c.
︙ | ︙ | |||
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | ** WHERE f.word MATCH $prefix ** AND f.distance<=200 ** AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF') ** LIMIT 50 ** ** This last query will show up to 50 words out of the vocabulary that ** match or nearly match the $prefix. */ #include "sqlite3.h" #include <stdlib.h> #include <string.h> #include <assert.h> #include <stdio.h> #ifndef SQLITE_OMIT_VIRTUALTABLE /* ** Forward declaration of objects used by this implementation */ typedef struct fuzzer_vtab fuzzer_vtab; typedef struct fuzzer_cursor fuzzer_cursor; typedef struct fuzzer_rule fuzzer_rule; typedef struct fuzzer_seen fuzzer_seen; typedef struct fuzzer_stem fuzzer_stem; /* | > > > > > > > > > > > > > > > > > > > > > > > > > > > | < > > > > > > > > > > > > > | > | | | | < | > | | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | ** WHERE f.word MATCH $prefix ** AND f.distance<=200 ** AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF') ** LIMIT 50 ** ** This last query will show up to 50 words out of the vocabulary that ** match or nearly match the $prefix. ** ** MULTIPLE RULE SETS ** ** An enhancement as of 2012-02-14 allows multiple rule sets to coexist in ** the same fuzzer. This allows, for example, the fuzzer to operate in ** multiple languages. ** ** A new column "ruleset" is added to the table. This column must have a ** value between 0 and 49. The default value for the ruleset is 0. But ** alternative values can be specified. For example: ** ** INSERT INTO f(ruleset,cFrom,cTo,Cost) VALUES(1,'qu','k',100); ** ** Only one ruleset will be used at a time. When running a MATCH query, ** specify the desired ruleset using a "ruleset=N" term in the WHERE clause. ** For example: ** ** SELECT vocabulary.w FROM f, vocabulary ** WHERE f.word MATCH $word ** AND f.distance<=200 ** AND f.word=vocabulary.w ** AND f.ruleset=1 -- Specify the ruleset to use here ** LIMIT 20 ** ** If no ruleset is specified in the WHERE clause, ruleset 0 is used. */ #include "sqlite3.h" #include <stdlib.h> #include <string.h> #include <assert.h> #include <stdio.h> #ifndef SQLITE_OMIT_VIRTUALTABLE /* ** Forward declaration of objects used by this implementation */ typedef struct fuzzer_vtab fuzzer_vtab; typedef struct fuzzer_cursor fuzzer_cursor; typedef struct fuzzer_rule fuzzer_rule; typedef struct fuzzer_seen fuzzer_seen; typedef struct fuzzer_stem fuzzer_stem; /* ** Various types. ** ** fuzzer_cost is the "cost" of an edit operation. ** ** fuzzer_len is the length of a matching string. ** ** fuzzer_ruleid is an ruleset identifier. */ typedef int fuzzer_cost; typedef signed char fuzzer_len; typedef unsigned char fuzzer_ruleid; /* ** Limits */ #define FUZZER_MX_LENGTH 50 /* Maximum length of a search string */ #define FUZZER_MX_RULEID 50 /* Maximum rule ID */ #define FUZZER_MX_COST 1000 /* Maximum single-rule cost */ /* ** Each transformation rule is stored as an instance of this object. ** All rules are kept on a linked list sorted by rCost. */ struct fuzzer_rule { fuzzer_rule *pNext; /* Next rule in order of increasing rCost */ char *zFrom; /* Transform from */ fuzzer_cost rCost; /* Cost of this transformation */ fuzzer_len nFrom, nTo; /* Length of the zFrom and zTo strings */ fuzzer_ruleid iRuleset; /* The rule set to which this rule belongs */ char zTo[4]; /* Transform to (extra space appended) */ }; /* ** A stem object is used to generate variants. It is also used to record ** previously generated outputs. ** ** Every stem is added to a hash table as it is output. Generation of ** duplicate stems is suppressed. ** ** Active stems (those that might generate new outputs) are kepts on a linked ** list sorted by increasing cost. The cost is the sum of rBaseCost and ** pRule->rCost. */ struct fuzzer_stem { char *zBasis; /* Word being fuzzed */ const fuzzer_rule *pRule; /* Current rule to apply */ fuzzer_stem *pNext; /* Next stem in rCost order */ fuzzer_stem *pHash; /* Next stem with same hash on zBasis */ fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */ fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */ fuzzer_len nBasis; /* Length of the zBasis string */ fuzzer_len n; /* Apply pRule at this character offset */ }; /* ** A fuzzer virtual-table object */ struct fuzzer_vtab { sqlite3_vtab base; /* Base class - must be first */ |
︙ | ︙ | |||
175 176 177 178 179 180 181 182 183 184 185 186 187 188 | fuzzer_stem *pStem; /* Stem with smallest rCostX */ fuzzer_stem *pDone; /* Stems already processed to completion */ fuzzer_stem *aQueue[FUZZER_NQUEUE]; /* Queue of stems with higher rCostX */ int mxQueue; /* Largest used index in aQueue[] */ char *zBuf; /* Temporary use buffer */ int nBuf; /* Bytes allocated for zBuf */ int nStem; /* Number of stems allocated */ fuzzer_rule nullRule; /* Null rule used first */ fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */ }; /* Methods for the fuzzer module */ static int fuzzerConnect( sqlite3 *db, | > | 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | fuzzer_stem *pStem; /* Stem with smallest rCostX */ fuzzer_stem *pDone; /* Stems already processed to completion */ fuzzer_stem *aQueue[FUZZER_NQUEUE]; /* Queue of stems with higher rCostX */ int mxQueue; /* Largest used index in aQueue[] */ char *zBuf; /* Temporary use buffer */ int nBuf; /* Bytes allocated for zBuf */ int nStem; /* Number of stems allocated */ int iRuleset; /* Only process rules from this ruleset */ fuzzer_rule nullRule; /* Null rule used first */ fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */ }; /* Methods for the fuzzer module */ static int fuzzerConnect( sqlite3 *db, |
︙ | ︙ | |||
198 199 200 201 202 203 204 | return SQLITE_ERROR; } n = strlen(argv[0]) + 1; pNew = sqlite3_malloc( sizeof(*pNew) + n ); if( pNew==0 ) return SQLITE_NOMEM; pNew->zClassName = (char*)&pNew[1]; memcpy(pNew->zClassName, argv[0], n); | | > | 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | return SQLITE_ERROR; } n = strlen(argv[0]) + 1; pNew = sqlite3_malloc( sizeof(*pNew) + n ); if( pNew==0 ) return SQLITE_NOMEM; pNew->zClassName = (char*)&pNew[1]; memcpy(pNew->zClassName, argv[0], n); sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,ruleset,cFrom,cTo,cost)"); memset(pNew, 0, sizeof(*pNew)); *ppVtab = &pNew->base; return SQLITE_OK; } /* Note that for this virtual table, the xCreate and xConnect ** methods are identical. */ |
︙ | ︙ | |||
420 421 422 423 424 425 426 | fuzzer_stem *pLookup; if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){ return -1; } h = fuzzerHash(pCur->zBuf); pLookup = pCur->apHash[h]; | | | 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 | fuzzer_stem *pLookup; if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){ return -1; } h = fuzzerHash(pCur->zBuf); pLookup = pCur->apHash[h]; while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){ pLookup = pLookup->pHash; } return pLookup!=0; } /* ** Advance a fuzzer_stem to its next value. Return 0 if there are |
︙ | ︙ | |||
449 450 451 452 453 454 455 | if( rc==0 ){ fuzzerCost(pStem); return 1; } } } pStem->n = -1; | > > > | | | 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 | if( rc==0 ){ fuzzerCost(pStem); return 1; } } } pStem->n = -1; do{ pRule = pRule->pNext; }while( pRule && pRule->iRuleset!=pCur->iRuleset ); pStem->pRule = pRule; if( pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0; } return 0; } /* ** The two input stem lists are both sorted in order of increasing ** rCostX. Merge them together into a single list, sorted by rCostX, and |
︙ | ︙ | |||
663 664 665 666 667 668 669 670 671 672 | sqlite3_vtab_cursor *pVtabCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor; const char *zWord = 0; fuzzer_stem *pStem; fuzzerClearCursor(pCur, 1); pCur->rLimit = 2147483647; | > > | > > | | > > | < | > | 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 | sqlite3_vtab_cursor *pVtabCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor; const char *zWord = 0; fuzzer_stem *pStem; int idx; fuzzerClearCursor(pCur, 1); pCur->rLimit = 2147483647; idx = 0; if( idxNum & 1 ){ zWord = (const char*)sqlite3_value_text(argv[0]); idx++; } if( idxNum & 2 ){ pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[idx]); idx++; } if( idxNum & 4 ){ pCur->iRuleset = (fuzzer_cost)sqlite3_value_int(argv[idx]); idx++; } if( zWord==0 ) zWord = ""; pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0); if( pStem==0 ) return SQLITE_NOMEM; pCur->nullRule.pNext = pCur->pVtab->pRule; pCur->nullRule.rCost = 0; pCur->nullRule.nFrom = 0; |
︙ | ︙ | |||
731 732 733 734 735 736 737 | fuzzer_cursor *pCur = (fuzzer_cursor*)cur; return pCur->rLimit<=(fuzzer_cost)0; } /* ** Search for terms of these forms: ** | | | | > | | > > > | | > | | > > | 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 | fuzzer_cursor *pCur = (fuzzer_cursor*)cur; return pCur->rLimit<=(fuzzer_cost)0; } /* ** Search for terms of these forms: ** ** (A) word MATCH $str ** (B1) distance < $value ** (B2) distance <= $value ** (C) ruleid == $ruleid ** ** The distance< and distance<= are both treated as distance<=. ** The query plan number is a bit vector: ** ** bit 1: Term of the form (A) found ** bit 2: Term like (B1) or (B2) found ** bit 3: Term like (C) found ** ** If bit-1 is set, $str is always in filter.argv[0]. If bit-2 is set ** then $value is in filter.argv[0] if bit-1 is clear and is in ** filter.argv[1] if bit-1 is set. If bit-3 is set, then $ruleid is ** in filter.argv[0] if bit-1 and bit-2 are both zero, is in ** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in ** filter.argv[2] if both bit-1 and bit-2 are set. */ static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ int iPlan = 0; int iDistTerm = -1; int iRulesetTerm = -1; int i; const struct sqlite3_index_constraint *pConstraint; pConstraint = pIdxInfo->aConstraint; for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ if( pConstraint->usable==0 ) continue; if( (iPlan & 1)==0 && pConstraint->iColumn==0 |
︙ | ︙ | |||
768 769 770 771 772 773 774 | && pConstraint->iColumn==1 && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) ){ iPlan |= 2; iDistTerm = i; } | > > > > > > > | > | | > | > > > | | 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 | && pConstraint->iColumn==1 && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) ){ iPlan |= 2; iDistTerm = i; } if( (iPlan & 4)==0 && pConstraint->iColumn==2 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ iPlan |= 4; pIdxInfo->aConstraintUsage[i].omit = 1; iRulesetTerm = i; } } if( iPlan & 2 ){ pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1+((iPlan&1)!=0); } if( iPlan & 4 ){ int idx = 1; if( iPlan & 1 ) idx++; if( iPlan & 2 ) idx++; pIdxInfo->aConstraintUsage[iRulesetTerm].argvIndex = idx; } pIdxInfo->idxNum = iPlan; if( pIdxInfo->nOrderBy==1 && pIdxInfo->aOrderBy[0].iColumn==1 && pIdxInfo->aOrderBy[0].desc==0 ){ pIdxInfo->orderByConsumed = 1; |
︙ | ︙ | |||
807 808 809 810 811 812 813 | fuzzer_vtab *p = (fuzzer_vtab*)pVTab; fuzzer_rule *pRule; const char *zFrom; int nFrom; const char *zTo; int nTo; fuzzer_cost rCost; | > | | | | | | > > > > > > > > > > > > > > > | 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 | fuzzer_vtab *p = (fuzzer_vtab*)pVTab; fuzzer_rule *pRule; const char *zFrom; int nFrom; const char *zTo; int nTo; fuzzer_cost rCost; int rulesetId; if( argc!=8 ){ sqlite3_free(pVTab->zErrMsg); pVTab->zErrMsg = sqlite3_mprintf("cannot delete from a %s virtual table", p->zClassName); return SQLITE_CONSTRAINT; } if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){ sqlite3_free(pVTab->zErrMsg); pVTab->zErrMsg = sqlite3_mprintf("cannot update a %s virtual table", p->zClassName); return SQLITE_CONSTRAINT; } zFrom = (char*)sqlite3_value_text(argv[5]); if( zFrom==0 ) zFrom = ""; zTo = (char*)sqlite3_value_text(argv[6]); if( zTo==0 ) zTo = ""; if( strcmp(zFrom,zTo)==0 ){ /* Silently ignore null transformations */ return SQLITE_OK; } rCost = sqlite3_value_int(argv[7]); if( rCost<=0 || rCost>FUZZER_MX_COST ){ sqlite3_free(pVTab->zErrMsg); pVTab->zErrMsg = sqlite3_mprintf("cost must be between 1 and %d", FUZZER_MX_COST); return SQLITE_CONSTRAINT; } nFrom = strlen(zFrom); nTo = strlen(zTo); if( nFrom>FUZZER_MX_LENGTH || nTo>FUZZER_MX_LENGTH ){ sqlite3_free(pVTab->zErrMsg); pVTab->zErrMsg = sqlite3_mprintf("maximum string length is %d", FUZZER_MX_LENGTH); return SQLITE_CONSTRAINT; } rulesetId = sqlite3_value_int(argv[4]); if( rulesetId<0 || rulesetId>FUZZER_MX_RULEID ){ sqlite3_free(pVTab->zErrMsg); pVTab->zErrMsg = sqlite3_mprintf("rulesetid must be between 0 and %d", FUZZER_MX_RULEID); return SQLITE_CONSTRAINT; } pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo ); if( pRule==0 ){ return SQLITE_NOMEM; } pRule->zFrom = &pRule->zTo[nTo+1]; pRule->nFrom = nFrom; memcpy(pRule->zFrom, zFrom, nFrom+1); memcpy(pRule->zTo, zTo, nTo+1); pRule->nTo = nTo; pRule->rCost = rCost; pRule->pNext = p->pNewRule; pRule->iRuleset = rulesetId; p->pNewRule = pRule; return SQLITE_OK; } /* ** A virtual table module that provides read-only access to a ** Tcl global variable namespace. |
︙ | ︙ |
Changes to test/fuzzer1.test.
︙ | ︙ | |||
38 39 40 41 42 43 44 45 46 47 48 49 50 51 | } {} do_test fuzzer1-1.3 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' } } {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100 ebcdo 110 obcde 110 obcda 111 obcdo 210} do_test fuzzer1-2.0 { execsql { CREATE VIRTUAL TABLE temp.f2 USING fuzzer; -- costs based on English letter frequencies INSERT INTO f2(cFrom,cTo,cost) VALUES('a','e',24); INSERT INTO f2(cFrom,cTo,cost) VALUES('a','o',47); | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | } {} do_test fuzzer1-1.3 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' } } {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100 ebcdo 110 obcde 110 obcda 111 obcdo 210} do_test fuzzer1-1.4 { db eval { INSERT INTO f1(ruleset, cfrom, cto, cost) VALUES(1,'b','x',1); INSERT INTO f1(ruleset, cfrom, cto, cost) VALUES(1,'d','y',10); INSERT INTO f1(ruleset, cfrom, cto, cost) VALUES(1,'y','z',100); } } {} do_test fuzzer1-1.5 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' } } {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100 ebcdo 110 obcde 110 obcda 111 obcdo 210} do_test fuzzer1-1.6 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND ruleset=0 } } {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100 ebcdo 110 obcde 110 obcda 111 obcdo 210} do_test fuzzer1-1.7 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND ruleset=1 } } {abcde 0 axcde 1 axcda 2 abcye 10 abcya 11 axcye 11 axcya 12 abcze 110 abcza 111 axcze 111 axcza 112} do_test fuzzer1-1.8 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND distance<100 } } {abcde 0 abcda 1 ebcde 10 ebcda 11} do_test fuzzer1-1.9 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND distance<=100 } } {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100} do_test fuzzer1-1.10 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND distance<100 AND ruleset=0 } } {abcde 0 abcda 1 ebcde 10 ebcda 11} do_test fuzzer1-1.11 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND distance<=100 AND ruleset=0 } } {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100} do_test fuzzer1-1.12 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND distance<12 AND ruleset=1 } } {abcde 0 axcde 1 axcda 2 abcye 10 abcya 11 axcye 11} do_test fuzzer1-1.13 { db eval { SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND distance<=12 AND ruleset=1 } } {abcde 0 axcde 1 axcda 2 abcye 10 abcya 11 axcye 11 axcya 12} do_test fuzzer1-2.0 { execsql { CREATE VIRTUAL TABLE temp.f2 USING fuzzer; -- costs based on English letter frequencies INSERT INTO f2(cFrom,cTo,cost) VALUES('a','e',24); INSERT INTO f2(cFrom,cTo,cost) VALUES('a','o',47); |
︙ | ︙ |