Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -5748,11 +5748,11 @@ } } pPage = pCur->pPage; idx = ++pCur->ix; - if( !pPage->isInit ){ + if( !pPage->isInit || sqlite3FaultSim(412) ){ /* The only known way for this to happen is for there to be a ** recursive SQL function that does a DELETE operation as part of a ** SELECT which deletes content out from under an active cursor ** in a corrupt database file where the table being DELETE-ed from ** has pages in common with the table being queried. See TH3 Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -4380,10 +4380,147 @@ assert( pOp[1].opcode==OP_IdxLT || pOp[1].opcode==OP_IdxGT ); pOp++; /* Skip the OP_IdxLt or OP_IdxGT that follows */ } break; } + + +/* Opcode: SeekScan P1 * * * * +** Synopsis: Scan-ahead up to P1 rows +** +** This opcode is a prefix opcode to OP_SeekGE. In other words, this +** opcode must be immediately followed by OP_SeekGE. Furthermore, the +** OP_SeekGE must be followed by OP_IdxGT. These constraints are +** checked by assert() statements. +** +** This opcode uses the P1 through P4 operands of the subsequent +** OP_SeekGE. In the text that follows, the operands of the subsequent +** OP_SeekGE opcode are denoted as SeekOP.P1 through SeekOP.P4. Only +** the P1 operand of this opcode is used, and it is denoted as This.P1. +** +** This opcode helps to optimize IN operators on a multi-column index +** where the IN operator is on the later terms of the index by avoiding +** unnecessary seeks on the btree, substituting steps to the next row +** of the b-tree instead. A correct answer is obtained if this opcode +** is omitted or is a no-op. +** +** The SeekGE.P3 and SeekGE.P4 operands identify an unpacked key which +** is the desired entry that we want the cursor SeekGE.P1 to be pointing +** to. Call this SeekGE.P4/P5 row the "target". +** +** If the SeekGE.P1 cursor is not currently pointing to a valid row, +** then this opcode is a no-op and control passes through into the OP_SeekGE. +** +** If the SeekGE.P1 cursor is pointing to a valid row, then that row +** might be the target row, or it might be near and slightly before the +** target row. This opcode attempts to position the cursor on the target +** row by, perhaps stepping by invoking sqlite3BtreeStep() on the cursor +** between 0 and This.P1 times. +** +** There are three possible outcomes from this opcode:
    +** +**
  1. If after This.P1 steps, the cursor is still point to a place that +** is earlier in the btree than the target row, +** then fall through into the subsquence OP_SeekGE opcode. +** +**
  2. If the cursor is successfully moved to the target row by 0 or more +** sqlite3BtreeNext() calls, then jump to the first instruction after the +** OP_IdxGT opcode - or in other words, skip the next two opcodes. +** +**
  3. If the cursor ends up past the target row (indicating the the target +** row does not exist in the btree) then jump to SeekOP.P2. +**
+*/ +case OP_SeekScan: { + VdbeCursor *pC; + int res; + int n; + UnpackedRecord r; + + assert( pOp[1].opcode==OP_SeekGE ); + assert( pOp[2].opcode==OP_IdxGT ); + assert( pOp[1].p1==pOp[2].p1 ); + assert( pOp[1].p2==pOp[2].p2 ); + assert( pOp[1].p3==pOp[2].p3 ); + assert( pOp[1].p4.i==pOp[2].p4.i ); + assert( pOp->p1>0 ); + pC = p->apCsr[pOp[1].p1]; + assert( pC!=0 ); + assert( pC->eCurType==CURTYPE_BTREE ); + assert( !pC->isTable ); + if( !sqlite3BtreeCursorIsValidNN(pC->uc.pCursor) ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... cursor not valid - fall through\n"); + } +#endif + break; + } + n = pOp->p1; + assert( n>=1 ); + r.pKeyInfo = pC->pKeyInfo; + r.nField = (u16)pOp[1].p4.i; + r.default_rc = 0; + r.aMem = &aMem[pOp[1].p3]; +#ifdef SQLITE_DEBUG + { + int i; + for(i=0; i0 ){ + seekscan_search_fail: +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... %d steps and then skip\n", pOp->p1 - n); + } +#endif + VdbeBranchTaken(1,3); + pOp++; + goto jump_to_p2; + } + if( res==0 ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... %d steps and then success\n", pOp->p1 - n); + } +#endif + VdbeBranchTaken(2,3); + pOp += 2; + break; + } + if( n<=0 ){ +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + printf("... fall through after %d steps\n", pOp->p1); + } +#endif + VdbeBranchTaken(0,3); + break; + } + n--; + rc = sqlite3BtreeNext(pC->uc.pCursor, 0); + if( rc ){ + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + goto seekscan_search_fail; + }else{ + goto abort_due_to_error; + } + } + } + + break; +} + /* Opcode: SeekHit P1 P2 P3 * * ** Synopsis: set P2<=seekHit<=P3 ** ** Increase or decrease the seekHit value for cursor P1, if necessary, @@ -5912,22 +6049,45 @@ assert( memIsValid(&r.aMem[i]) ); REGISTER_TRACE(pOp->p3+i, &aMem[pOp->p3+i]); } } #endif - res = 0; /* Not needed. Only used to silence a warning. */ - rc = sqlite3VdbeIdxKeyCompare(db, pC, &r, &res); + + /* Inlined version of sqlite3VdbeIdxKeyCompare() */ + { + i64 nCellKey = 0; + BtCursor *pCur; + Mem m; + + assert( pC->eCurType==CURTYPE_BTREE ); + pCur = pC->uc.pCursor; + assert( sqlite3BtreeCursorIsValid(pCur) ); + nCellKey = sqlite3BtreePayloadSize(pCur); + /* nCellKey will always be between 0 and 0xffffffff because of the way + ** that btreeParseCellPtr() and sqlite3GetVarint32() are implemented */ + if( nCellKey<=0 || nCellKey>0x7fffffff ){ + rc = SQLITE_CORRUPT_BKPT; + goto abort_due_to_error; + } + sqlite3VdbeMemInit(&m, db, 0); + rc = sqlite3VdbeMemFromBtreeZeroOffset(pCur, (u32)nCellKey, &m); + if( rc ) goto abort_due_to_error; + res = sqlite3VdbeRecordCompareWithSkip(m.n, m.z, &r, 0); + sqlite3VdbeMemRelease(&m); + } + /* End of inlined sqlite3VdbeIdxKeyCompare() */ + assert( (OP_IdxLE&1)==(OP_IdxLT&1) && (OP_IdxGE&1)==(OP_IdxGT&1) ); if( (pOp->opcode&1)==(OP_IdxLT&1) ){ assert( pOp->opcode==OP_IdxLE || pOp->opcode==OP_IdxLT ); res = -res; }else{ assert( pOp->opcode==OP_IdxGE || pOp->opcode==OP_IdxGT ); res++; } VdbeBranchTaken(res>0,2); - if( rc ) goto abort_due_to_error; + assert( rc==SQLITE_OK ); if( res>0 ) goto jump_to_p2; break; } /* Opcode: Destroy P1 P2 P3 * * Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -2562,11 +2562,11 @@ safetyMargin = 10; /* TUNING: extra weight for indexed IN */ if( M + logK + safetyMargin < nIn + rLogSize ){ WHERETRACE(0x40, ("Scan preferred over IN operator on column %d of \"%s\" (%d<%d)\n", saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize)); - continue; + pNew->wsFlags |= WHERE_IN_SEEKSCAN; }else{ WHERETRACE(0x40, ("IN operator preferred on column %d of \"%s\" (%d>=%d)\n", saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize)); } @@ -5195,10 +5195,11 @@ sqlite3VdbeAddOp3(v, op, iIndexCur, pIx->tnum, iDb); sqlite3VdbeSetP4KeyInfo(pParse, pIx); if( (pLoop->wsFlags & WHERE_CONSTRAINT)!=0 && (pLoop->wsFlags & (WHERE_COLUMN_RANGE|WHERE_SKIPSCAN))==0 && (pLoop->wsFlags & WHERE_BIGNULL_SORT)==0 + && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 && (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 && pWInfo->eDistinct!=WHERE_DISTINCT_ORDERED ){ sqlite3VdbeChangeP5(v, OPFLAG_SEEKEQ); } @@ -5357,11 +5358,13 @@ sqlite3VdbeResolveLabel(v, pLevel->addrNxt); for(j=pLevel->u.in.nIn, pIn=&pLevel->u.in.aInLoop[j-1]; j>0; j--, pIn--){ sqlite3VdbeJumpHere(v, pIn->addrInTop+1); if( pIn->eEndLoopOp!=OP_Noop ){ if( pIn->nPrefix ){ - assert( pLoop->wsFlags & WHERE_IN_EARLYOUT ); + int bEarlyOut = + (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 + && (pLoop->wsFlags & WHERE_IN_EARLYOUT)!=0; if( pLevel->iLeftJoin ){ /* For LEFT JOIN queries, cursor pIn->iCur may not have been ** opened yet. This occurs for WHERE clauses such as ** "a = ? AND b IN (...)", where the index is on (a, b). If ** the RHS of the (a=?) is NULL, then the "b IN (...)" may @@ -5368,16 +5371,14 @@ ** never have been coded, but the body of the loop run to ** return the null-row. So, if the cursor is not open yet, ** jump over the OP_Next or OP_Prev instruction about to ** be coded. */ sqlite3VdbeAddOp2(v, OP_IfNotOpen, pIn->iCur, - sqlite3VdbeCurrentAddr(v) + 2 + - ((pLoop->wsFlags & WHERE_VIRTUALTABLE)==0) - ); + sqlite3VdbeCurrentAddr(v) + 2 + bEarlyOut); VdbeCoverage(v); } - if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 ){ + if( bEarlyOut ){ sqlite3VdbeAddOp4Int(v, OP_IfNoHope, pLevel->iIdxCur, sqlite3VdbeCurrentAddr(v)+2, pIn->iBase, pIn->nPrefix); VdbeCoverage(v); } Index: src/whereInt.h ================================================================== --- src/whereInt.h +++ src/whereInt.h @@ -617,7 +617,8 @@ #define WHERE_SKIPSCAN 0x00008000 /* Uses the skip-scan algorithm */ #define WHERE_UNQ_WANTED 0x00010000 /* WHERE_ONEROW would have been helpful*/ #define WHERE_PARTIALIDX 0x00020000 /* The automatic index is partial */ #define WHERE_IN_EARLYOUT 0x00040000 /* Perhaps quit IN loops early */ #define WHERE_BIGNULL_SORT 0x00080000 /* Column nEq of index is BIGNULL */ +#define WHERE_IN_SEEKSCAN 0x00100000 /* Seek-scan optimization for IN */ #endif /* !defined(SQLITE_WHEREINT_H) */ Index: src/wherecode.c ================================================================== --- src/wherecode.c +++ src/wherecode.c @@ -568,11 +568,11 @@ pLoop->wsFlags |= WHERE_IN_ABLE; if( pLevel->u.in.nIn==0 ){ pLevel->addrNxt = sqlite3VdbeMakeLabel(pParse); } - if( iEq>0 ){ + if( iEq>0 && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 ){ pLoop->wsFlags |= WHERE_IN_EARLYOUT; } i = pLevel->u.in.nIn; pLevel->u.in.nIn += nEq; @@ -606,11 +606,11 @@ pIn->eEndLoopOp = OP_Noop; } pIn++; } } - if( iEq>0 ){ + if( iEq>0 && (pLoop->wsFlags & WHERE_IN_SEEKSCAN)==0 ){ sqlite3VdbeAddOp3(v, OP_SeekHit, pLevel->iIdxCur, 0, iEq); } }else{ pLevel->u.in.nIn = 0; } @@ -1802,10 +1802,24 @@ VdbeComment((v, "NULL-scan pass ctr")); } op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; assert( op!=0 ); + if( (pLoop->wsFlags & WHERE_IN_SEEKSCAN)!=0 ){ + assert( op==OP_SeekGE ); + assert( regBignull==0 ); + /* TUNING: The OP_SeekScan opcode seeks to reduce the number + ** of expensive seek operations by replacing a single seek with + ** 1 or more step operations. The question is, how many steps + ** should we try before giving up and going with a seek. The cost + ** of a seek is proportional to the logarithm of the of the number + ** of entries in the tree, so basing the number of steps to try + ** on the estimated number of rows in the btree seems like a good + ** guess. */ + sqlite3VdbeAddOp1(v, OP_SeekScan, (pIdx->aiRowLogEst[0]+9)/10); + VdbeCoverage(v); + } sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); VdbeCoverage(v); VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind ); VdbeCoverageIf(v, op==OP_Last); testcase( op==OP_Last ); VdbeCoverageIf(v, op==OP_SeekGT); testcase( op==OP_SeekGT ); @@ -1904,11 +1918,11 @@ testcase( op==OP_IdxGE ); VdbeCoverageIf(v, op==OP_IdxGE ); testcase( op==OP_IdxLT ); VdbeCoverageIf(v, op==OP_IdxLT ); testcase( op==OP_IdxLE ); VdbeCoverageIf(v, op==OP_IdxLE ); } - if( pLoop->wsFlags & WHERE_IN_EARLYOUT ){ + if( (pLoop->wsFlags & WHERE_IN_EARLYOUT)!=0 ){ sqlite3VdbeAddOp3(v, OP_SeekHit, iIdxCur, nEq, nEq); } /* Seek the table cursor, if required */ omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY)!=0