/ Check-in [da591985]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enable the b-tree cursor object's overflow page-number cache, which is normally enabled only for incr-blob cursors, for all cursors.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | overflow-pgno-cache
Files: files | file ages | folders
SHA1: da59198505990a4fe832be7932117c7e014955b7
User & Date: dan 2014-03-11 20:33:04
Original Comment: Enable the b-tree cursor objects overflow page-number cache, which is normally enabled only for incr-blob cursors, for all cursors.
References
2017-01-27
00:31
Simplify the accessPayload() routine so that it always populates the overflow page cache. In the one case where populating the page cache can lead to problems, simply invalidate the cache as soon as accessPayload() returns. This simplification reduces code size and helps accessPayload() to run a little faster. This backs out the eOp==2 mode of accessPayload() added by check-in [da59198505]. check-in: 68e7a8c6 user: drh tags: trunk
Context
2014-03-11
23:40
Combine the various boolean fields of the BtCursor object into a single bit-vector. This allows setting or clearing more than one boolean at a time and makes the overflow-pgno-cache branch faster than trunk on speedtest1. check-in: 968fec44 user: drh tags: overflow-pgno-cache
20:33
Enable the b-tree cursor object's overflow page-number cache, which is normally enabled only for incr-blob cursors, for all cursors. check-in: da591985 user: dan tags: overflow-pgno-cache
15:27
Version 3.8.4.1 check-in: 018d317b user: drh tags: trunk, release, version-3.8.4.1
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

   442    442   */
   443    443   #ifdef SQLITE_DEBUG
   444    444   static int cursorHoldsMutex(BtCursor *p){
   445    445     return sqlite3_mutex_held(p->pBt->mutex);
   446    446   }
   447    447   #endif
   448    448   
   449         -
   450         -#ifndef SQLITE_OMIT_INCRBLOB
   451    449   /*
   452         -** Invalidate the overflow page-list cache for cursor pCur, if any.
          450  +** Invalidate the overflow cache of the cursor passed as the first argument.
          451  +** on the shared btree structure pBt.
   453    452   */
   454         -static void invalidateOverflowCache(BtCursor *pCur){
   455         -  assert( cursorHoldsMutex(pCur) );
   456         -  sqlite3_free(pCur->aOverflow);
   457         -  pCur->aOverflow = 0;
   458         -}
          453  +#define invalidateOverflowCache(pCur) (pCur->bOvflValid = 0)
   459    454   
   460    455   /*
   461    456   ** Invalidate the overflow page-list cache for all cursors opened
   462    457   ** on the shared btree structure pBt.
   463    458   */
   464    459   static void invalidateAllOverflowCache(BtShared *pBt){
   465    460     BtCursor *p;
   466    461     assert( sqlite3_mutex_held(pBt->mutex) );
   467    462     for(p=pBt->pCursor; p; p=p->pNext){
   468    463       invalidateOverflowCache(p);
   469    464     }
   470    465   }
   471    466   
          467  +#ifndef SQLITE_OMIT_INCRBLOB
   472    468   /*
   473    469   ** This function is called before modifying the contents of a table
   474    470   ** to invalidate any incrblob cursors that are open on the
   475    471   ** row or one of the rows being modified.
   476    472   **
   477    473   ** If argument isClearTable is true, then the entire contents of the
   478    474   ** table is about to be deleted. In this case invalidate all incrblob
................................................................................
   494    490       if( p->isIncrblobHandle && (isClearTable || p->info.nKey==iRow) ){
   495    491         p->eState = CURSOR_INVALID;
   496    492       }
   497    493     }
   498    494   }
   499    495   
   500    496   #else
   501         -  /* Stub functions when INCRBLOB is omitted */
   502         -  #define invalidateOverflowCache(x)
   503         -  #define invalidateAllOverflowCache(x)
          497  +  /* Stub function when INCRBLOB is omitted */
   504    498     #define invalidateIncrblobCursors(x,y,z)
   505    499   #endif /* SQLITE_OMIT_INCRBLOB */
   506    500   
   507    501   /*
   508    502   ** Set bit pgno of the BtShared.pHasContent bitvec. This is called 
   509    503   ** when a page that previously contained data becomes a free-list leaf 
   510    504   ** page.
................................................................................
  3690   3684       if( pCur->pNext ){
  3691   3685         pCur->pNext->pPrev = pCur->pPrev;
  3692   3686       }
  3693   3687       for(i=0; i<=pCur->iPage; i++){
  3694   3688         releasePage(pCur->apPage[i]);
  3695   3689       }
  3696   3690       unlockBtreeIfUnused(pBt);
  3697         -    invalidateOverflowCache(pCur);
         3691  +    sqlite3DbFree(pBtree->db, pCur->aOverflow);
  3698   3692       /* sqlite3_free(pCur); */
  3699   3693       sqlite3BtreeLeave(pBtree);
  3700   3694     }
  3701   3695     return SQLITE_OK;
  3702   3696   }
  3703   3697   
  3704   3698   /*
................................................................................
  3911   3905       memcpy(pBuf, pPayload, nByte);
  3912   3906     }
  3913   3907     return SQLITE_OK;
  3914   3908   }
  3915   3909   
  3916   3910   /*
  3917   3911   ** This function is used to read or overwrite payload information
  3918         -** for the entry that the pCur cursor is pointing to. If the eOp
  3919         -** parameter is 0, this is a read operation (data copied into
  3920         -** buffer pBuf). If it is non-zero, a write (data copied from
  3921         -** buffer pBuf).
         3912  +** for the entry that the pCur cursor is pointing to. The eOp
         3913  +** argument is interpreted as follows:
         3914  +**
         3915  +**   0: The operation is a read. Populate the overflow cache.
         3916  +**   1: The operation is a write. Populate the overflow cache.
         3917  +**   2: The operation is a read. Do not populate the overflow cache.
  3922   3918   **
  3923   3919   ** A total of "amt" bytes are read or written beginning at "offset".
  3924   3920   ** Data is read to or from the buffer pBuf.
  3925   3921   **
  3926   3922   ** The content being read or written might appear on the main page
  3927   3923   ** or be scattered out on multiple overflow pages.
  3928   3924   **
  3929         -** If the BtCursor.isIncrblobHandle flag is set, and the current
  3930         -** cursor entry uses one or more overflow pages, this function
  3931         -** allocates space for and lazily popluates the overflow page-list 
  3932         -** cache array (BtCursor.aOverflow). Subsequent calls use this
  3933         -** cache to make seeking to the supplied offset more efficient.
         3925  +** If the current cursor entry uses one or more overflow pages and the
         3926  +** eOp argument is not 2, this function may allocate space for and lazily 
         3927  +** popluates the overflow page-list cache array (BtCursor.aOverflow). 
         3928  +** Subsequent calls use this cache to make seeking to the supplied offset 
         3929  +** more efficient.
  3934   3930   **
  3935   3931   ** Once an overflow page-list cache has been allocated, it may be
  3936   3932   ** invalidated if some other cursor writes to the same table, or if
  3937   3933   ** the cursor is moved to a different row. Additionally, in auto-vacuum
  3938   3934   ** mode, the following events may invalidate an overflow page-list cache.
  3939   3935   **
  3940   3936   **   * An incremental vacuum,
................................................................................
  3973   3969   
  3974   3970     /* Check if data must be read/written to/from the btree page itself. */
  3975   3971     if( offset<pCur->info.nLocal ){
  3976   3972       int a = amt;
  3977   3973       if( a+offset>pCur->info.nLocal ){
  3978   3974         a = pCur->info.nLocal - offset;
  3979   3975       }
  3980         -    rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage);
         3976  +    rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage);
  3981   3977       offset = 0;
  3982   3978       pBuf += a;
  3983   3979       amt -= a;
  3984   3980     }else{
  3985   3981       offset -= pCur->info.nLocal;
  3986   3982     }
  3987   3983   
  3988   3984     if( rc==SQLITE_OK && amt>0 ){
  3989   3985       const u32 ovflSize = pBt->usableSize - 4;  /* Bytes content per ovfl page */
  3990   3986       Pgno nextPage;
  3991   3987   
  3992   3988       nextPage = get4byte(&aPayload[pCur->info.nLocal]);
  3993   3989   
  3994         -#ifndef SQLITE_OMIT_INCRBLOB
  3995   3990       /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[]
  3996   3991       ** has not been allocated, allocate it now. The array is sized at
  3997   3992       ** one entry for each overflow page in the overflow chain. The
  3998   3993       ** page number of the first overflow page is stored in aOverflow[0],
  3999   3994       ** etc. A value of 0 in the aOverflow[] array means "not yet known"
  4000   3995       ** (the cache is lazily populated).
  4001   3996       */
  4002         -    if( pCur->isIncrblobHandle && !pCur->aOverflow ){
         3997  +    if( eOp!=2 && !pCur->bOvflValid ){
  4003   3998         int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize;
  4004         -      pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl);
  4005         -      /* nOvfl is always positive.  If it were zero, fetchPayload would have
  4006         -      ** been used instead of this routine. */
  4007         -      if( ALWAYS(nOvfl) && !pCur->aOverflow ){
  4008         -        rc = SQLITE_NOMEM;
         3999  +      if( nOvfl>pCur->nOvflAlloc ){
         4000  +        Pgno *aNew = (Pgno*)sqlite3DbRealloc(
         4001  +            pCur->pBtree->db, pCur->aOverflow, nOvfl*2*sizeof(Pgno)
         4002  +        );
         4003  +        if( aNew==0 ){
         4004  +          rc = SQLITE_NOMEM;
         4005  +        }else{
         4006  +          pCur->nOvflAlloc = nOvfl*2;
         4007  +          pCur->aOverflow = aNew;
         4008  +        }
         4009  +      }
         4010  +      if( rc==SQLITE_OK ){
         4011  +        memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno));
         4012  +        pCur->bOvflValid = 1;
  4009   4013         }
  4010   4014       }
  4011   4015   
  4012   4016       /* If the overflow page-list cache has been allocated and the
  4013   4017       ** entry for the first required overflow page is valid, skip
  4014   4018       ** directly to it.
  4015   4019       */
  4016         -    if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){
         4020  +    if( pCur->bOvflValid && pCur->aOverflow[offset/ovflSize] ){
  4017   4021         iIdx = (offset/ovflSize);
  4018   4022         nextPage = pCur->aOverflow[iIdx];
  4019   4023         offset = (offset%ovflSize);
  4020   4024       }
  4021         -#endif
  4022   4025   
  4023   4026       for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){
  4024   4027   
  4025         -#ifndef SQLITE_OMIT_INCRBLOB
  4026   4028         /* If required, populate the overflow page-list cache. */
  4027         -      if( pCur->aOverflow ){
         4029  +      if( pCur->bOvflValid ){
  4028   4030           assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage);
  4029   4031           pCur->aOverflow[iIdx] = nextPage;
  4030   4032         }
  4031         -#endif
  4032   4033   
  4033   4034         if( offset>=ovflSize ){
  4034   4035           /* The only reason to read this page is to obtain the page
  4035   4036           ** number for the next page in the overflow chain. The page
  4036   4037           ** data is not required. So first try to lookup the overflow
  4037   4038           ** page-list cache, if any, then fall back to the getOverflowPage()
  4038   4039           ** function.
  4039   4040           */
  4040         -#ifndef SQLITE_OMIT_INCRBLOB
  4041         -        if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){
         4041  +        if( pCur->bOvflValid && pCur->aOverflow[iIdx+1] ){
  4042   4042             nextPage = pCur->aOverflow[iIdx+1];
  4043   4043           } else 
  4044         -#endif
  4045   4044             rc = getOverflowPage(pBt, nextPage, 0, &nextPage);
  4046   4045           offset -= ovflSize;
  4047   4046         }else{
  4048   4047           /* Need to read this page properly. It contains some of the
  4049   4048           ** range of data that is being read (eOp==0) or written (eOp!=0).
  4050   4049           */
  4051   4050   #ifdef SQLITE_DIRECT_OVERFLOW_READ
................................................................................
  4065   4064           **   4) there is no open write-transaction, and
  4066   4065           **   5) the database is not a WAL database,
  4067   4066           **
  4068   4067           ** then data can be read directly from the database file into the
  4069   4068           ** output buffer, bypassing the page-cache altogether. This speeds
  4070   4069           ** up loading large records that span many overflow pages.
  4071   4070           */
  4072         -        if( eOp==0                                             /* (1) */
         4071  +        if( (eOp&0x01)==0                                      /* (1) */
  4073   4072            && offset==0                                          /* (2) */
  4074   4073            && pBt->inTransaction==TRANS_READ                     /* (4) */
  4075   4074            && (fd = sqlite3PagerFile(pBt->pPager))->pMethods     /* (3) */
  4076   4075            && pBt->pPage1->aData[19]==0x01                       /* (5) */
  4077   4076           ){
  4078   4077             u8 aSave[4];
  4079   4078             u8 *aWrite = &pBuf[-4];
................................................................................
  4083   4082             memcpy(aWrite, aSave, 4);
  4084   4083           }else
  4085   4084   #endif
  4086   4085   
  4087   4086           {
  4088   4087             DbPage *pDbPage;
  4089   4088             rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage,
  4090         -              (eOp==0 ? PAGER_GET_READONLY : 0)
         4089  +              ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0)
  4091   4090             );
  4092   4091             if( rc==SQLITE_OK ){
  4093   4092               aPayload = sqlite3PagerGetData(pDbPage);
  4094   4093               nextPage = get4byte(aPayload);
  4095         -            rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
         4094  +            rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage);
  4096   4095               sqlite3PagerUnref(pDbPage);
  4097   4096               offset = 0;
  4098   4097             }
  4099   4098           }
  4100   4099           amt -= a;
  4101   4100           pBuf += a;
  4102   4101         }
................................................................................
  4333   4332     MemPage *pRoot;
  4334   4333     int rc = SQLITE_OK;
  4335   4334   
  4336   4335     assert( cursorHoldsMutex(pCur) );
  4337   4336     assert( CURSOR_INVALID < CURSOR_REQUIRESEEK );
  4338   4337     assert( CURSOR_VALID   < CURSOR_REQUIRESEEK );
  4339   4338     assert( CURSOR_FAULT   > CURSOR_REQUIRESEEK );
         4339  +  invalidateOverflowCache(pCur);
  4340   4340     if( pCur->eState>=CURSOR_REQUIRESEEK ){
  4341   4341       if( pCur->eState==CURSOR_FAULT ){
  4342   4342         assert( pCur->skipNext!=SQLITE_OK );
  4343   4343         return pCur->skipNext;
  4344   4344       }
  4345   4345       sqlite3BtreeClearCursor(pCur);
  4346   4346     }
................................................................................
  4682   4682             nCell = (int)pCur->info.nKey;
  4683   4683             pCellKey = sqlite3Malloc( nCell );
  4684   4684             if( pCellKey==0 ){
  4685   4685               rc = SQLITE_NOMEM;
  4686   4686               goto moveto_finish;
  4687   4687             }
  4688   4688             pCur->aiIdx[pCur->iPage] = (u16)idx;
  4689         -          rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0);
         4689  +          rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2);
  4690   4690             if( rc ){
  4691   4691               sqlite3_free(pCellKey);
  4692   4692               goto moveto_finish;
  4693   4693             }
  4694   4694             c = xRecordCompare(nCell, pCellKey, pIdxKey, 0);
  4695   4695             sqlite3_free(pCellKey);
  4696   4696           }
................................................................................
  4771   4771     int idx;
  4772   4772     MemPage *pPage;
  4773   4773   
  4774   4774     assert( cursorHoldsMutex(pCur) );
  4775   4775     assert( pRes!=0 );
  4776   4776     assert( *pRes==0 || *pRes==1 );
  4777   4777     assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
         4778  +  invalidateOverflowCache(pCur);
  4778   4779     if( pCur->eState!=CURSOR_VALID ){
  4779   4780       rc = restoreCursorPosition(pCur);
  4780   4781       if( rc!=SQLITE_OK ){
  4781   4782         *pRes = 0;
  4782   4783         return rc;
  4783   4784       }
  4784   4785       if( CURSOR_INVALID==pCur->eState ){
................................................................................
  4866   4867     int rc;
  4867   4868     MemPage *pPage;
  4868   4869   
  4869   4870     assert( cursorHoldsMutex(pCur) );
  4870   4871     assert( pRes!=0 );
  4871   4872     assert( *pRes==0 || *pRes==1 );
  4872   4873     assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
         4874  +  invalidateOverflowCache(pCur);
  4873   4875     pCur->atLast = 0;
  4874   4876     if( pCur->eState!=CURSOR_VALID ){
  4875   4877       if( ALWAYS(pCur->eState>=CURSOR_REQUIRESEEK) ){
  4876   4878         rc = btreeRestoreCursorPosition(pCur);
  4877   4879         if( rc!=SQLITE_OK ){
  4878   4880           *pRes = 0;
  4879   4881           return rc;
................................................................................
  8420   8422     assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) );
  8421   8423     assert( pCsr->apPage[pCsr->iPage]->intKey );
  8422   8424   
  8423   8425     return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1);
  8424   8426   }
  8425   8427   
  8426   8428   /* 
  8427         -** Set a flag on this cursor to cache the locations of pages from the 
  8428         -** overflow list for the current row. This is used by cursors opened
  8429         -** for incremental blob IO only.
  8430         -**
  8431         -** This function sets a flag only. The actual page location cache
  8432         -** (stored in BtCursor.aOverflow[]) is allocated and used by function
  8433         -** accessPayload() (the worker function for sqlite3BtreeData() and
  8434         -** sqlite3BtreePutData()).
         8429  +** Mark this cursor as an incremental blob cursor.
  8435   8430   */
  8436         -void sqlite3BtreeCacheOverflow(BtCursor *pCur){
  8437         -  assert( cursorHoldsMutex(pCur) );
  8438         -  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  8439         -  invalidateOverflowCache(pCur);
         8431  +void sqlite3BtreeIncrblobCursor(BtCursor *pCur){
  8440   8432     pCur->isIncrblobHandle = 1;
  8441   8433   }
  8442   8434   #endif
  8443   8435   
  8444   8436   /*
  8445   8437   ** Set both the "read version" (single byte at byte offset 18) and 
  8446   8438   ** "write version" (single byte at byte offset 19) fields in the database

Changes to src/btree.h.

   183    183   int sqlite3BtreeDataSize(BtCursor*, u32 *pSize);
   184    184   int sqlite3BtreeData(BtCursor*, u32 offset, u32 amt, void*);
   185    185   
   186    186   char *sqlite3BtreeIntegrityCheck(Btree*, int *aRoot, int nRoot, int, int*);
   187    187   struct Pager *sqlite3BtreePager(Btree*);
   188    188   
   189    189   int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*);
   190         -void sqlite3BtreeCacheOverflow(BtCursor *);
          190  +void sqlite3BtreeIncrblobCursor(BtCursor *);
   191    191   void sqlite3BtreeClearCursor(BtCursor *);
   192    192   int sqlite3BtreeSetVersion(Btree *pBt, int iVersion);
   193    193   void sqlite3BtreeCursorHints(BtCursor *, unsigned int mask);
   194    194   
   195    195   #ifndef NDEBUG
   196    196   int sqlite3BtreeCursorIsValid(BtCursor*);
   197    197   #endif

Changes to src/btreeInt.h.

   493    493   */
   494    494   struct BtCursor {
   495    495     Btree *pBtree;            /* The Btree to which this cursor belongs */
   496    496     BtShared *pBt;            /* The BtShared this cursor points to */
   497    497     BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */
   498    498     struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */
   499    499   #ifndef SQLITE_OMIT_INCRBLOB
          500  +  int nOvflAlloc;           /* Allocated size of aOverflow[] array */
          501  +  u8 bOvflValid;            /* True if size and contents of aOverflow[] valid */
   500    502     Pgno *aOverflow;          /* Cache of overflow page locations */
   501    503   #endif
   502    504     Pgno pgnoRoot;            /* The root page of this tree */
   503    505     CellInfo info;            /* A parse of the cell we are pointing at */
   504    506     i64 nKey;        /* Size of pKey, or last integer key */
   505    507     void *pKey;      /* Saved key that was cursor's last known position */
   506    508     int skipNext;    /* Prev() is noop if negative. Next() is noop if positive */

Changes to src/vdbeblob.c.

    73     73         rc = SQLITE_ERROR;
    74     74         sqlite3_finalize(p->pStmt);
    75     75         p->pStmt = 0;
    76     76       }else{
    77     77         p->iOffset = pC->aType[p->iCol + pC->nField];
    78     78         p->nByte = sqlite3VdbeSerialTypeLen(type);
    79     79         p->pCsr =  pC->pCursor;
    80         -      sqlite3BtreeEnterCursor(p->pCsr);
    81         -      sqlite3BtreeCacheOverflow(p->pCsr);
    82         -      sqlite3BtreeLeaveCursor(p->pCsr);
           80  +      sqlite3BtreeIncrblobCursor(p->pCsr);
    83     81       }
    84     82     }
    85     83   
    86     84     if( rc==SQLITE_ROW ){
    87     85       rc = SQLITE_OK;
    88     86     }else if( p->pStmt ){
    89     87       rc = sqlite3_finalize(p->pStmt);