Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix the LIKE optimization even when comparing mixed-case BLOBs. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | like-opt-fix |
Files: | files | file ages | folders |
SHA1: |
a58aafdb4e1422b6a8ffc07a67984928 |
User & Date: | drh 2015-03-06 19:47:38.505 |
Context
2015-03-06
| ||
20:49 | Test cases added. Comments fixed. Proposed solution for ticket [05f43be8fdda9fbd9]. (check-in: 6b993bd540 user: drh tags: like-opt-fix) | |
19:47 | Fix the LIKE optimization even when comparing mixed-case BLOBs. (check-in: a58aafdb4e user: drh tags: like-opt-fix) | |
16:45 | The LIKE optimization must be applied twice, once for strings and a second time for BLOBs. Ticket [05f43be8fdda9f]. This check-in is a proof-of-concept of how that might be done. (check-in: 5757e803cb user: drh tags: like-opt-fix) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 | Expr *pLeft; /* LHS of LIKE/GLOB operator */ Expr *pStr2; /* Copy of pStr1 - RHS of LIKE/GLOB operator */ Expr *pNewExpr1; Expr *pNewExpr2; int idxNew1; int idxNew2; Token sCollSeqName; /* Name of collating sequence */ pLeft = pExpr->x.pList->a[1].pExpr; pStr2 = sqlite3ExprDup(db, pStr1, 0); if( !db->mallocFailed ){ u8 c, *pC; /* Last character before the first wildcard */ pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1]; c = *pC; if( noCase ){ /* The point is to increment the last character before the first ** wildcard. But if we increment '@', that will push it into the ** alphabetic range where case conversions will mess up the ** inequality. To avoid this, make sure to also run the full ** LIKE on all candidate expressions by clearing the isComplete flag */ if( c=='A'-1 ) isComplete = 0; c = sqlite3UpperToLower[c]; } *pC = c + 1; } sCollSeqName.z = noCase ? "NOCASE" : "BINARY"; sCollSeqName.n = 6; pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); | > > > > > > > > > > > > > > > > | | < | < | 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 | Expr *pLeft; /* LHS of LIKE/GLOB operator */ Expr *pStr2; /* Copy of pStr1 - RHS of LIKE/GLOB operator */ Expr *pNewExpr1; Expr *pNewExpr2; int idxNew1; int idxNew2; Token sCollSeqName; /* Name of collating sequence */ const u16 wtFlags = TERM_LIKEOPT | TERM_VIRTUAL | TERM_DYNAMIC; pTerm->wtFlags |= TERM_LIKE; pLeft = pExpr->x.pList->a[1].pExpr; pStr2 = sqlite3ExprDup(db, pStr1, 0); /* Convert the lower bound to upper-case and the upper bound to ** lower-case (upper-case is less than lower-case in ASCII) so that ** the range constraints also work for BLOBs */ if( noCase && !pParse->db->mallocFailed ){ int i; char c; for(i=0; (c = pStr1->u.zToken[i])!=0; i++){ pStr1->u.zToken[i] = sqlite3Toupper(c); pStr2->u.zToken[i] = sqlite3Tolower(c); } } if( !db->mallocFailed ){ u8 c, *pC; /* Last character before the first wildcard */ pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1]; c = *pC; if( noCase ){ /* The point is to increment the last character before the first ** wildcard. But if we increment '@', that will push it into the ** alphabetic range where case conversions will mess up the ** inequality. To avoid this, make sure to also run the full ** LIKE on all candidate expressions by clearing the isComplete flag */ if( c=='A'-1 ) isComplete = 0; c = sqlite3UpperToLower[c]; } *pC = c + 1; } sCollSeqName.z = noCase ? "NOCASE" : "BINARY"; sCollSeqName.n = 6; pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); pNewExpr1 = sqlite3PExpr(pParse, TK_GE, sqlite3ExprAddCollateToken(pParse,pNewExpr1,&sCollSeqName), pStr1, 0); transferJoinMarkings(pNewExpr1, pExpr); idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags); testcase( idxNew1==0 ); exprAnalyze(pSrc, pWC, idxNew1); pNewExpr2 = sqlite3ExprDup(db, pLeft, 0); pNewExpr2 = sqlite3PExpr(pParse, TK_LT, sqlite3ExprAddCollateToken(pParse,pNewExpr2,&sCollSeqName), pStr2, 0); transferJoinMarkings(pNewExpr2, pExpr); idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags); testcase( idxNew2==0 ); exprAnalyze(pSrc, pWC, idxNew2); pTerm = &pWC->a[idxTerm]; if( isComplete ){ markTermAsChild(pWC, idxNew1, idxTerm); markTermAsChild(pWC, idxNew2, idxTerm); } |
︙ | ︙ | |||
2471 2472 2473 2474 2475 2476 2477 2478 2479 | ** Disabling a term causes that term to not be tested in the inner loop ** of the join. Disabling is an optimization. When terms are satisfied ** by indices, we disable them to prevent redundant tests in the inner ** loop. We would get the correct results if nothing were ever disabled, ** but joins might run a little slower. The trick is to disable as much ** as we can without disabling too much. If we disabled in (1), we'd get ** the wrong answer. See ticket #813. */ static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ | > > > > > > > > > > > > > > | > > > | > | | | | < < > | 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 | ** Disabling a term causes that term to not be tested in the inner loop ** of the join. Disabling is an optimization. When terms are satisfied ** by indices, we disable them to prevent redundant tests in the inner ** loop. We would get the correct results if nothing were ever disabled, ** but joins might run a little slower. The trick is to disable as much ** as we can without disabling too much. If we disabled in (1), we'd get ** the wrong answer. See ticket #813. ** ** If all the children of a term are disabled, then that term is also ** automatically disabled. In this way, terms get disabled if derived ** virtual terms are tested first. For example: ** ** x GLOB 'abc*' AND x>='abc' AND x<'acd' ** \___________/ \______/ \_____/ ** parent child1 child2 ** ** Only the parent term was in the original WHERE clause. The child1 ** and child2 terms were added by the LIKE optimization. If both of ** the virtual child terms are valid, then testing of the parent can be ** skipped. */ static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ int nLoop = 0; while( pTerm && (pTerm->wtFlags & TERM_CODED)==0 && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) && (pLevel->notReady & pTerm->prereqAll)==0 ){ if( nLoop && (pTerm->wtFlags & TERM_LIKE)!=0 ){ pTerm->wtFlags |= TERM_LIKECOND; }else{ pTerm->wtFlags |= TERM_CODED; } if( pTerm->iParent<0 ) break; pTerm = &pTerm->pWC->a[pTerm->iParent]; pTerm->nChild--; if( pTerm->nChild!=0 ) break; nLoop++; } } /* ** Code an OP_Affinity opcode to apply the column affinity string zAff ** to the n registers starting at base. ** |
︙ | ︙ | |||
3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 | #endif /* Insert code to test every subexpression that can be completely ** computed using the current set of tables. */ for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ Expr *pE; testcase( pTerm->wtFlags & TERM_VIRTUAL ); testcase( pTerm->wtFlags & TERM_CODED ); if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ testcase( pWInfo->untestedTerms==0 && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ); pWInfo->untestedTerms = 1; continue; } pE = pTerm->pExpr; assert( pE!=0 ); if( pLevel->iLeftJoin && !ExprHasProperty(pE, EP_FromJoin) ){ continue; } sqlite3ExprIfFalse(pParse, pE, addrCont, SQLITE_JUMPIFNULL); pTerm->wtFlags |= TERM_CODED; } /* Insert code to test for implied constraints based on transitivity ** of the "==" operator. ** ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123" | > > > > > > > | 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 | #endif /* Insert code to test every subexpression that can be completely ** computed using the current set of tables. */ for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ Expr *pE; int skipLikeAddr = 0; testcase( pTerm->wtFlags & TERM_VIRTUAL ); testcase( pTerm->wtFlags & TERM_CODED ); if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ testcase( pWInfo->untestedTerms==0 && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ); pWInfo->untestedTerms = 1; continue; } pE = pTerm->pExpr; assert( pE!=0 ); if( pLevel->iLeftJoin && !ExprHasProperty(pE, EP_FromJoin) ){ continue; } if( pTerm->wtFlags & TERM_LIKECOND ){ assert( pLevel->iLikeRepCntr>0 ); skipLikeAddr = sqlite3VdbeAddOp1(v, OP_IfZero, pLevel->iLikeRepCntr); VdbeCoverage(v); } sqlite3ExprIfFalse(pParse, pE, addrCont, SQLITE_JUMPIFNULL); if( skipLikeAddr ) sqlite3VdbeJumpHere(v, skipLikeAddr); pTerm->wtFlags |= TERM_CODED; } /* Insert code to test for implied constraints based on transitivity ** of the "==" operator. ** ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123" |
︙ | ︙ |
Changes to src/whereInt.h.
︙ | ︙ | |||
273 274 275 276 277 278 279 | #define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */ #define TERM_OR_OK 0x40 /* Used during OR-clause processing */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 # define TERM_VNULL 0x80 /* Manufactured x>NULL or x<=NULL term */ #else # define TERM_VNULL 0x00 /* Disabled if not using stat3 */ #endif | | > > | 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | #define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */ #define TERM_OR_OK 0x40 /* Used during OR-clause processing */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 # define TERM_VNULL 0x80 /* Manufactured x>NULL or x<=NULL term */ #else # define TERM_VNULL 0x00 /* Disabled if not using stat3 */ #endif #define TERM_LIKEOPT 0x100 /* Virtual terms from the LIKE optimization */ #define TERM_LIKECOND 0x200 /* Conditionally this LIKE operator term */ #define TERM_LIKE 0x400 /* The original LIKE operator */ /* ** An instance of the WhereScan object is used as an iterator for locating ** terms in the WHERE clause that are useful to the query planner. */ struct WhereScan { WhereClause *pOrigWC; /* Original, innermost WhereClause */ |
︙ | ︙ |