/ Check-in [b387e2f9]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow read-only cursors to use mmap pages even if there is an open write transaction.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | experimental-mmap
Files: files | file ages | folders
SHA1: b387e2f9d24dccac1fd040e309f6fc7ec1cfffba
User & Date: dan 2013-03-15 18:29:18
Context
2013-03-15
19:13
Fix a dropped error code in pager.c. check-in: 022fdc98 user: dan tags: experimental-mmap
18:29
Allow read-only cursors to use mmap pages even if there is an open write transaction. check-in: b387e2f9 user: dan tags: experimental-mmap
2013-03-14
18:34
Use mmap() to read from the database file in rollback mode. This branch is unix only for now. check-in: 6f21d9cb user: dan tags: experimental-mmap
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

  1565   1565   ** means we have started to be concerned about content and the disk
  1566   1566   ** read should occur at that point.
  1567   1567   */
  1568   1568   static int btreeGetPage(
  1569   1569     BtShared *pBt,       /* The btree */
  1570   1570     Pgno pgno,           /* Number of the page to fetch */
  1571   1571     MemPage **ppPage,    /* Return the page in this parameter */
  1572         -  int noContent        /* Do not load page content if true */
         1572  +  int noContent,       /* Do not load page content if true */
         1573  +  int bReadonly        /* True if a read-only (mmap) page is ok */
  1573   1574   ){
  1574   1575     int rc;
  1575   1576     DbPage *pDbPage;
         1577  +  int flags = (noContent ? PAGER_ACQUIRE_NOCONTENT : 0) 
         1578  +            | (bReadonly ? PAGER_ACQUIRE_READONLY : 0);
  1576   1579   
         1580  +  assert( noContent==0 || bReadonly==0 );
  1577   1581     assert( sqlite3_mutex_held(pBt->mutex) );
  1578         -  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent);
         1582  +  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags);
  1579   1583     if( rc ) return rc;
  1580   1584     *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
  1581   1585     return SQLITE_OK;
  1582   1586   }
  1583   1587   
  1584   1588   /*
  1585   1589   ** Retrieve a page from the pager cache. If the requested page is not
................................................................................
  1614   1618   ** convenience wrapper around separate calls to btreeGetPage() and 
  1615   1619   ** btreeInitPage().
  1616   1620   **
  1617   1621   ** If an error occurs, then the value *ppPage is set to is undefined. It
  1618   1622   ** may remain unchanged, or it may be set to an invalid value.
  1619   1623   */
  1620   1624   static int getAndInitPage(
  1621         -  BtShared *pBt,          /* The database file */
  1622         -  Pgno pgno,           /* Number of the page to get */
  1623         -  MemPage **ppPage     /* Write the page pointer here */
         1625  +  BtShared *pBt,                  /* The database file */
         1626  +  Pgno pgno,                      /* Number of the page to get */
         1627  +  MemPage **ppPage,               /* Write the page pointer here */
         1628  +  int bReadonly                   /* True if a read-only (mmap) page is ok */
  1624   1629   ){
  1625   1630     int rc;
  1626   1631     assert( sqlite3_mutex_held(pBt->mutex) );
  1627   1632   
  1628   1633     if( pgno>btreePagecount(pBt) ){
  1629   1634       rc = SQLITE_CORRUPT_BKPT;
  1630   1635     }else{
  1631         -    rc = btreeGetPage(pBt, pgno, ppPage, 0);
         1636  +    rc = btreeGetPage(pBt, pgno, ppPage, 0, bReadonly);
  1632   1637       if( rc==SQLITE_OK ){
  1633   1638         rc = btreeInitPage(*ppPage);
  1634   1639         if( rc!=SQLITE_OK ){
  1635   1640           releasePage(*ppPage);
  1636   1641         }
  1637   1642       }
  1638   1643     }
................................................................................
  2346   2351     int nPageFile = 0;   /* Number of pages in the database file */
  2347   2352     int nPageHeader;     /* Number of pages in the database according to hdr */
  2348   2353   
  2349   2354     assert( sqlite3_mutex_held(pBt->mutex) );
  2350   2355     assert( pBt->pPage1==0 );
  2351   2356     rc = sqlite3PagerSharedLock(pBt->pPager);
  2352   2357     if( rc!=SQLITE_OK ) return rc;
  2353         -  rc = btreeGetPage(pBt, 1, &pPage1, 0);
         2358  +  rc = btreeGetPage(pBt, 1, &pPage1, 0, 0);
  2354   2359     if( rc!=SQLITE_OK ) return rc;
  2355   2360   
  2356   2361     /* Do some checking to help insure the file we opened really is
  2357   2362     ** a valid database file. 
  2358   2363     */
  2359   2364     nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData);
  2360   2365     sqlite3PagerPagecount(pBt->pPager, &nPageFile);
................................................................................
  2561   2566     p->pBt->nPage = 0;
  2562   2567     rc = newDatabase(p->pBt);
  2563   2568     sqlite3BtreeLeave(p);
  2564   2569     return rc;
  2565   2570   }
  2566   2571   
  2567   2572   /*
  2568         -** If the shared-btree passed as the only argument is holding references
  2569         -** to mmap pages, replace them with read/write pages. Return SQLITE_OK
  2570         -** if successful, or an error code otherwise.
         2573  +** Ensure that any root page references held by open cursors are not
         2574  +** mmap pages.
  2571   2575   */
  2572   2576   static int btreeSwapOutMmap(BtShared *pBt){
  2573         -  BtCursor *pCsr;
  2574         -  for(pCsr=pBt->pCursor; pCsr; pCsr=pCsr->pNext){
  2575         -    int i;
  2576         -    for(i=0; i<=pCsr->iPage; i++){
  2577         -      MemPage *pPg = pCsr->apPage[i];
         2577  +  int rc = SQLITE_OK;             /* Return code */
         2578  +  BtCursor *pCsr;                 /* Used to iterate through all open cursors */
         2579  +
         2580  +  for(pCsr=pBt->pCursor; pCsr && rc==SQLITE_OK; pCsr=pCsr->pNext){
         2581  +    if( pCsr->iPage>=0 ){
         2582  +      MemPage *pPg = pCsr->apPage[0];
  2578   2583         if( pPg->pDbPage->flags & PGHDR_MMAP ){
  2579         -        int rc;
  2580   2584           MemPage *pNew = 0;
  2581         -        rc = btreeGetPage(pBt, pPg->pgno, &pNew, 0);
  2582         -        if( rc==SQLITE_OK && i==pCsr->iPage ){
         2585  +        rc = btreeGetPage(pBt, pPg->pgno, &pNew, 0, 0);
         2586  +        if( rc==SQLITE_OK && pCsr->iPage==0 ){
  2583   2587             pCsr->info.pCell = pNew->aData + (pCsr->info.pCell - pPg->aData);
  2584   2588           }
  2585         -        pCsr->apPage[i] = pNew;
         2589  +        pCsr->apPage[0] = pNew;
  2586   2590           releasePage(pPg);
  2587   2591           if( rc!=SQLITE_OK ) return rc;
  2588   2592         }
  2589   2593       }
  2590   2594     }
  2591   2595   
  2592         -  return SQLITE_OK;
         2596  +  return rc;
  2593   2597   }
  2594   2598   
  2595   2599   /*
  2596   2600   ** Attempt to start a new transaction. A write-transaction
  2597   2601   ** is started if the second argument is nonzero, otherwise a read-
  2598   2602   ** transaction.  If the second argument is 2 or more and exclusive
  2599   2603   ** transaction is started, meaning that no other process is allowed
................................................................................
  2936   2940     }
  2937   2941   
  2938   2942     /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
  2939   2943     ** that it points at iFreePage. Also fix the pointer map entry for
  2940   2944     ** iPtrPage.
  2941   2945     */
  2942   2946     if( eType!=PTRMAP_ROOTPAGE ){
  2943         -    rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
         2947  +    rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0, 0);
  2944   2948       if( rc!=SQLITE_OK ){
  2945   2949         return rc;
  2946   2950       }
  2947   2951       rc = sqlite3PagerWrite(pPtrPage->pDbPage);
  2948   2952       if( rc!=SQLITE_OK ){
  2949   2953         releasePage(pPtrPage);
  2950   2954         return rc;
................................................................................
  3020   3024         }
  3021   3025       } else {
  3022   3026         Pgno iFreePg;             /* Index of free page to move pLastPg to */
  3023   3027         MemPage *pLastPg;
  3024   3028         u8 eMode = BTALLOC_ANY;   /* Mode parameter for allocateBtreePage() */
  3025   3029         Pgno iNear = 0;           /* nearby parameter for allocateBtreePage() */
  3026   3030   
  3027         -      rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0);
         3031  +      rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0, 0);
  3028   3032         if( rc!=SQLITE_OK ){
  3029   3033           return rc;
  3030   3034         }
  3031   3035   
  3032   3036         /* If bCommit is zero, this loop runs exactly once and page pLastPg
  3033   3037         ** is swapped with the first free page pulled off the free list.
  3034   3038         **
................................................................................
  3112   3116       Pgno nOrig = btreePagecount(pBt);
  3113   3117       Pgno nFree = get4byte(&pBt->pPage1->aData[36]);
  3114   3118       Pgno nFin = finalDbSize(pBt, nOrig, nFree);
  3115   3119   
  3116   3120       if( nOrig<nFin ){
  3117   3121         rc = SQLITE_CORRUPT_BKPT;
  3118   3122       }else if( nFree>0 ){
  3119         -      invalidateAllOverflowCache(pBt);
  3120         -      rc = incrVacuumStep(pBt, nFin, nOrig, 0);
         3123  +      rc = saveAllCursors(pBt, 0, 0);
         3124  +      if( rc==SQLITE_OK ){
         3125  +        invalidateAllOverflowCache(pBt);
         3126  +        rc = incrVacuumStep(pBt, nFin, nOrig, 0);
         3127  +      }
  3121   3128         if( rc==SQLITE_OK ){
  3122   3129           rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
  3123   3130           put4byte(&pBt->pPage1->aData[28], pBt->nPage);
  3124   3131         }
  3125   3132       }else{
  3126   3133         rc = SQLITE_DONE;
  3127   3134       }
................................................................................
  3434   3441       if( rc2!=SQLITE_OK ){
  3435   3442         rc = rc2;
  3436   3443       }
  3437   3444   
  3438   3445       /* The rollback may have destroyed the pPage1->aData value.  So
  3439   3446       ** call btreeGetPage() on page 1 again to make
  3440   3447       ** sure pPage1->aData is set correctly. */
  3441         -    if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
         3448  +    if( btreeGetPage(pBt, 1, &pPage1, 0, 0)==SQLITE_OK ){
  3442   3449         int nPage = get4byte(28+(u8*)pPage1->aData);
  3443   3450         testcase( nPage==0 );
  3444   3451         if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage);
  3445   3452         testcase( pBt->nPage!=nPage );
  3446   3453         pBt->nPage = nPage;
  3447   3454         releasePage(pPage1);
  3448   3455       }
................................................................................
  3868   3875         }
  3869   3876       }
  3870   3877     }
  3871   3878   #endif
  3872   3879   
  3873   3880     assert( next==0 || rc==SQLITE_DONE );
  3874   3881     if( rc==SQLITE_OK ){
  3875         -    rc = btreeGetPage(pBt, ovfl, &pPage, 0);
         3882  +    rc = btreeGetPage(pBt, ovfl, &pPage, 0, (ppPage==0));
  3876   3883       assert( rc==SQLITE_OK || pPage==0 );
  3877   3884       if( rc==SQLITE_OK ){
  3878   3885         next = get4byte(pPage->aData);
  3879   3886       }
  3880   3887     }
  3881   3888   
  3882   3889     *pPgnoNext = next;
................................................................................
  4089   4096             nextPage = get4byte(aWrite);
  4090   4097             memcpy(aWrite, aSave, 4);
  4091   4098           }else
  4092   4099   #endif
  4093   4100   
  4094   4101           {
  4095   4102             DbPage *pDbPage;
  4096         -          rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
         4103  +          rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage,
         4104  +              (eOp==0 ? PAGER_ACQUIRE_READONLY : 0)
         4105  +          );
  4097   4106             if( rc==SQLITE_OK ){
  4098   4107               aPayload = sqlite3PagerGetData(pDbPage);
  4099   4108               nextPage = get4byte(aPayload);
  4100   4109               rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
  4101   4110               sqlite3PagerUnref(pDbPage);
  4102   4111               offset = 0;
  4103   4112             }
................................................................................
  4268   4277     int i = pCur->iPage;
  4269   4278     MemPage *pNewPage;
  4270   4279     BtShared *pBt = pCur->pBt;
  4271   4280   
  4272   4281     assert( cursorHoldsMutex(pCur) );
  4273   4282     assert( pCur->eState==CURSOR_VALID );
  4274   4283     assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
         4284  +  assert( pCur->iPage>=0 );
  4275   4285     if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
  4276   4286       return SQLITE_CORRUPT_BKPT;
  4277   4287     }
  4278         -  rc = getAndInitPage(pBt, newPgno, &pNewPage);
         4288  +  rc = getAndInitPage(pBt, newPgno, &pNewPage, (pCur->wrFlag==0));
  4279   4289     if( rc ) return rc;
  4280   4290     pCur->apPage[i+1] = pNewPage;
  4281   4291     pCur->aiIdx[i+1] = 0;
  4282   4292     pCur->iPage++;
  4283   4293   
  4284   4294     pCur->info.nSize = 0;
  4285   4295     pCur->validNKey = 0;
................................................................................
  4388   4398         releasePage(pCur->apPage[i]);
  4389   4399       }
  4390   4400       pCur->iPage = 0;
  4391   4401     }else if( pCur->pgnoRoot==0 ){
  4392   4402       pCur->eState = CURSOR_INVALID;
  4393   4403       return SQLITE_OK;
  4394   4404     }else{
  4395         -    rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]);
         4405  +    rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0], 0);
  4396   4406       if( rc!=SQLITE_OK ){
  4397   4407         pCur->eState = CURSOR_INVALID;
  4398   4408         return rc;
  4399   4409       }
  4400   4410       pCur->iPage = 0;
  4401   4411   
  4402   4412       /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor
................................................................................
  5002   5012         }else{
  5003   5013           iTrunk = get4byte(&pPage1->aData[32]);
  5004   5014         }
  5005   5015         testcase( iTrunk==mxPage );
  5006   5016         if( iTrunk>mxPage ){
  5007   5017           rc = SQLITE_CORRUPT_BKPT;
  5008   5018         }else{
  5009         -        rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
         5019  +        rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0);
  5010   5020         }
  5011   5021         if( rc ){
  5012   5022           pTrunk = 0;
  5013   5023           goto end_allocate_page;
  5014   5024         }
  5015   5025         assert( pTrunk!=0 );
  5016   5026         assert( pTrunk->aData!=0 );
................................................................................
  5066   5076             MemPage *pNewTrunk;
  5067   5077             Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
  5068   5078             if( iNewTrunk>mxPage ){ 
  5069   5079               rc = SQLITE_CORRUPT_BKPT;
  5070   5080               goto end_allocate_page;
  5071   5081             }
  5072   5082             testcase( iNewTrunk==mxPage );
  5073         -          rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
         5083  +          rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0, 0);
  5074   5084             if( rc!=SQLITE_OK ){
  5075   5085               goto end_allocate_page;
  5076   5086             }
  5077   5087             rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
  5078   5088             if( rc!=SQLITE_OK ){
  5079   5089               releasePage(pNewTrunk);
  5080   5090               goto end_allocate_page;
................................................................................
  5146   5156             rc = sqlite3PagerWrite(pTrunk->pDbPage);
  5147   5157             if( rc ) goto end_allocate_page;
  5148   5158             if( closest<k-1 ){
  5149   5159               memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
  5150   5160             }
  5151   5161             put4byte(&aData[4], k-1);
  5152   5162             noContent = !btreeGetHasContent(pBt, *pPgno);
  5153         -          rc = btreeGetPage(pBt, *pPgno, ppPage, noContent);
         5163  +          rc = btreeGetPage(pBt, *pPgno, ppPage, noContent, 0);
  5154   5164             if( rc==SQLITE_OK ){
  5155   5165               rc = sqlite3PagerWrite((*ppPage)->pDbPage);
  5156   5166               if( rc!=SQLITE_OK ){
  5157   5167                 releasePage(*ppPage);
  5158   5168               }
  5159   5169             }
  5160   5170             searchList = 0;
................................................................................
  5194   5204         /* If *pPgno refers to a pointer-map page, allocate two new pages
  5195   5205         ** at the end of the file instead of one. The first allocated page
  5196   5206         ** becomes a new pointer-map page, the second is used by the caller.
  5197   5207         */
  5198   5208         MemPage *pPg = 0;
  5199   5209         TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
  5200   5210         assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
  5201         -      rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent);
         5211  +      rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent, 0);
  5202   5212         if( rc==SQLITE_OK ){
  5203   5213           rc = sqlite3PagerWrite(pPg->pDbPage);
  5204   5214           releasePage(pPg);
  5205   5215         }
  5206   5216         if( rc ) return rc;
  5207   5217         pBt->nPage++;
  5208   5218         if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; }
  5209   5219       }
  5210   5220   #endif
  5211   5221       put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage);
  5212   5222       *pPgno = pBt->nPage;
  5213   5223   
  5214   5224       assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
  5215         -    rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent);
         5225  +    rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent, 0);
  5216   5226       if( rc ) return rc;
  5217   5227       rc = sqlite3PagerWrite((*ppPage)->pDbPage);
  5218   5228       if( rc!=SQLITE_OK ){
  5219   5229         releasePage(*ppPage);
  5220   5230       }
  5221   5231       TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
  5222   5232     }
................................................................................
  5276   5286     nFree = get4byte(&pPage1->aData[36]);
  5277   5287     put4byte(&pPage1->aData[36], nFree+1);
  5278   5288   
  5279   5289     if( pBt->btsFlags & BTS_SECURE_DELETE ){
  5280   5290       /* If the secure_delete option is enabled, then
  5281   5291       ** always fully overwrite deleted information with zeros.
  5282   5292       */
  5283         -    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) )
         5293  +    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0, 0))!=0) )
  5284   5294        ||            ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0)
  5285   5295       ){
  5286   5296         goto freepage_out;
  5287   5297       }
  5288   5298       memset(pPage->aData, 0, pPage->pBt->pageSize);
  5289   5299     }
  5290   5300   
................................................................................
  5303   5313     ** first trunk page in the current free-list. This block tests if it
  5304   5314     ** is possible to add the page as a new free-list leaf.
  5305   5315     */
  5306   5316     if( nFree!=0 ){
  5307   5317       u32 nLeaf;                /* Initial number of leaf cells on trunk page */
  5308   5318   
  5309   5319       iTrunk = get4byte(&pPage1->aData[32]);
  5310         -    rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
         5320  +    rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0);
  5311   5321       if( rc!=SQLITE_OK ){
  5312   5322         goto freepage_out;
  5313   5323       }
  5314   5324   
  5315   5325       nLeaf = get4byte(&pTrunk->aData[4]);
  5316   5326       assert( pBt->usableSize>32 );
  5317   5327       if( nLeaf > (u32)pBt->usableSize/4 - 2 ){
................................................................................
  5349   5359   
  5350   5360     /* If control flows to this point, then it was not possible to add the
  5351   5361     ** the page being freed as a leaf page of the first trunk in the free-list.
  5352   5362     ** Possibly because the free-list is empty, or possibly because the 
  5353   5363     ** first trunk in the free-list is full. Either way, the page being freed
  5354   5364     ** will become the new first trunk page in the free-list.
  5355   5365     */
  5356         -  if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){
         5366  +  if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0, 0)) ){
  5357   5367       goto freepage_out;
  5358   5368     }
  5359   5369     rc = sqlite3PagerWrite(pPage->pDbPage);
  5360   5370     if( rc!=SQLITE_OK ){
  5361   5371       goto freepage_out;
  5362   5372     }
  5363   5373     put4byte(pPage->aData, iTrunk);
................................................................................
  6150   6160     if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){
  6151   6161       pRight = &pParent->aData[pParent->hdrOffset+8];
  6152   6162     }else{
  6153   6163       pRight = findCell(pParent, i+nxDiv-pParent->nOverflow);
  6154   6164     }
  6155   6165     pgno = get4byte(pRight);
  6156   6166     while( 1 ){
  6157         -    rc = getAndInitPage(pBt, pgno, &apOld[i]);
         6167  +    rc = getAndInitPage(pBt, pgno, &apOld[i], 0);
  6158   6168       if( rc ){
  6159   6169         memset(apOld, 0, (i+1)*sizeof(MemPage*));
  6160   6170         goto balance_cleanup;
  6161   6171       }
  6162   6172       nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
  6163   6173       if( (i--)==0 ) break;
  6164   6174   
................................................................................
  7241   7251         */
  7242   7252         u8 eType = 0;
  7243   7253         Pgno iPtrPage = 0;
  7244   7254   
  7245   7255         releasePage(pPageMove);
  7246   7256   
  7247   7257         /* Move the page currently at pgnoRoot to pgnoMove. */
  7248         -      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
         7258  +      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0);
  7249   7259         if( rc!=SQLITE_OK ){
  7250   7260           return rc;
  7251   7261         }
  7252   7262         rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
  7253   7263         if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
  7254   7264           rc = SQLITE_CORRUPT_BKPT;
  7255   7265         }
................................................................................
  7262   7272         rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0);
  7263   7273         releasePage(pRoot);
  7264   7274   
  7265   7275         /* Obtain the page at pgnoRoot */
  7266   7276         if( rc!=SQLITE_OK ){
  7267   7277           return rc;
  7268   7278         }
  7269         -      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
         7279  +      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0);
  7270   7280         if( rc!=SQLITE_OK ){
  7271   7281           return rc;
  7272   7282         }
  7273   7283         rc = sqlite3PagerWrite(pRoot->pDbPage);
  7274   7284         if( rc!=SQLITE_OK ){
  7275   7285           releasePage(pRoot);
  7276   7286           return rc;
................................................................................
  7338   7348     int i;
  7339   7349   
  7340   7350     assert( sqlite3_mutex_held(pBt->mutex) );
  7341   7351     if( pgno>btreePagecount(pBt) ){
  7342   7352       return SQLITE_CORRUPT_BKPT;
  7343   7353     }
  7344   7354   
  7345         -  rc = getAndInitPage(pBt, pgno, &pPage);
         7355  +  rc = getAndInitPage(pBt, pgno, &pPage, 0);
  7346   7356     if( rc ) return rc;
  7347   7357     for(i=0; i<pPage->nCell; i++){
  7348   7358       pCell = findCell(pPage, i);
  7349   7359       if( !pPage->leaf ){
  7350   7360         rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange);
  7351   7361         if( rc ) goto cleardatabasepage_out;
  7352   7362       }
................................................................................
  7440   7450     ** This error is caught long before control reaches this point.
  7441   7451     */
  7442   7452     if( NEVER(pBt->pCursor) ){
  7443   7453       sqlite3ConnectionBlocked(p->db, pBt->pCursor->pBtree->db);
  7444   7454       return SQLITE_LOCKED_SHAREDCACHE;
  7445   7455     }
  7446   7456   
  7447         -  rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
         7457  +  rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0, 0);
  7448   7458     if( rc ) return rc;
  7449   7459     rc = sqlite3BtreeClearTable(p, iTable, 0);
  7450   7460     if( rc ){
  7451   7461       releasePage(pPage);
  7452   7462       return rc;
  7453   7463     }
  7454   7464   
................................................................................
  7475   7485         }else{
  7476   7486           /* The table being dropped does not have the largest root-page
  7477   7487           ** number in the database. So move the page that does into the 
  7478   7488           ** gap left by the deleted root-page.
  7479   7489           */
  7480   7490           MemPage *pMove;
  7481   7491           releasePage(pPage);
  7482         -        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
         7492  +        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0);
  7483   7493           if( rc!=SQLITE_OK ){
  7484   7494             return rc;
  7485   7495           }
  7486   7496           rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0);
  7487   7497           releasePage(pMove);
  7488   7498           if( rc!=SQLITE_OK ){
  7489   7499             return rc;
  7490   7500           }
  7491   7501           pMove = 0;
  7492         -        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
         7502  +        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0);
  7493   7503           freePage(pMove, &rc);
  7494   7504           releasePage(pMove);
  7495   7505           if( rc!=SQLITE_OK ){
  7496   7506             return rc;
  7497   7507           }
  7498   7508           *piMoved = maxRootPgno;
  7499   7509         }
................................................................................
  7897   7907   
  7898   7908     /* Check that the page exists
  7899   7909     */
  7900   7910     pBt = pCheck->pBt;
  7901   7911     usableSize = pBt->usableSize;
  7902   7912     if( iPage==0 ) return 0;
  7903   7913     if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  7904         -  if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
         7914  +  if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0, 0))!=0 ){
  7905   7915       checkAppendMsg(pCheck, zContext,
  7906   7916          "unable to get the page. error code=%d", rc);
  7907   7917       return 0;
  7908   7918     }
  7909   7919   
  7910   7920     /* Clear MemPage.isInit to make sure the corruption detection code in
  7911   7921     ** btreeInitPage() is executed.  */

Changes to src/pager.c.

   654    654     sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
   655    655     PagerSavepoint *aSavepoint; /* Array of active savepoints */
   656    656     int nSavepoint;             /* Number of elements in aSavepoint[] */
   657    657     char dbFileVers[16];        /* Changes whenever database file changes */
   658    658   
   659    659     void *pMap;                 /* Memory mapped prefix of database file */
   660    660     i64 nMap;                   /* Size of mapping at pMap in bytes */ 
          661  +  i64 nMapValid;              /* Bytes at pMap known to be valid */
   661    662     int nMmapOut;               /* Number of mmap pages currently outstanding */
   662    663     PgHdr *pFree;               /* List of free mmap page headers (pDirty) */
   663    664     /*
   664    665     ** End of the routinely-changing class members
   665    666     ***************************************************************************/
   666    667   
   667    668     u16 nExtra;                 /* Add this many bytes to each in-memory page */
................................................................................
  2508   2509       assert( pPager->eLock==EXCLUSIVE_LOCK );
  2509   2510       /* TODO: Is it safe to use Pager.dbFileSize here? */
  2510   2511       rc = sqlite3OsFileSize(pPager->fd, &currentSize);
  2511   2512       newSize = szPage*(i64)nPage;
  2512   2513       if( rc==SQLITE_OK && currentSize!=newSize ){
  2513   2514         if( currentSize>newSize ){
  2514   2515           rc = sqlite3OsTruncate(pPager->fd, newSize);
         2516  +        if( newSize<pPager->nMapValid ){
         2517  +          pPager->nMapValid = newSize;
         2518  +        }
  2515   2519         }else if( (currentSize+szPage)<=newSize ){
  2516   2520           char *pTmp = pPager->pTmpSpace;
  2517   2521           memset(pTmp, 0, szPage);
  2518   2522           testcase( (newSize-szPage) == currentSize );
  2519   2523           testcase( (newSize-szPage) >  currentSize );
  2520   2524           rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage);
  2521   2525         }
................................................................................
  3814   3818   ** Unmap any mapping of the database file.
  3815   3819   */
  3816   3820   static int pagerUnmap(Pager *pPager){
  3817   3821     if( pPager->pMap ){
  3818   3822       munmap(pPager->pMap, pPager->nMap);
  3819   3823       pPager->pMap = 0;
  3820   3824       pPager->nMap = 0;
         3825  +    pPager->nMapValid = 0;
  3821   3826     }
  3822   3827     return SQLITE_OK;
  3823   3828   }
  3824   3829   
  3825   3830   static int pagerMap(Pager *pPager){
  3826   3831     int rc;
  3827   3832     i64 sz = 0;
................................................................................
  3835   3840       if( rc==SQLITE_OK ){
  3836   3841         void *pMap = mmap(0, sz, PROT_READ, MAP_SHARED, fd, 0);
  3837   3842         if( pMap==MAP_FAILED ){
  3838   3843         assert( 0 );
  3839   3844           return SQLITE_IOERR;
  3840   3845         }
  3841   3846         pPager->pMap = pMap;
  3842         -      pPager->nMap = sz;
         3847  +      pPager->nMapValid = pPager->nMap = sz;
  3843   3848       }
  3844   3849     }
  3845   3850   
  3846   3851     return rc;
  3847   3852   }
  3848   3853   
  3849   3854   static int pagerAcquireMapPage(Pager *pPager, Pgno pgno, PgHdr **ppPage){
................................................................................
  3854   3859   
  3855   3860     if( MEMDB==0 && pPager->tempFile==0 ){
  3856   3861       if( pPager->pMap==0 ){
  3857   3862         rc = pagerMap(pPager);
  3858   3863         if( rc!=SQLITE_OK ) return rc;
  3859   3864       }
  3860   3865   
  3861         -    if( pgno!=1 && pPager->pMap && pPager->nMap>=((i64)pgno*pPager->pageSize) ){
         3866  +    if( pgno!=1 && pPager->pMap 
         3867  +     && pPager->nMapValid>=((i64)pgno*pPager->pageSize) 
         3868  +    ){
  3862   3869         PgHdr *p;
  3863   3870         if( pPager->pFree ){
  3864   3871           p = pPager->pFree;
  3865   3872           pPager->pFree = p->pDirty;
  3866   3873           p->pDirty = 0;
  3867   3874           memset(p->pExtra, 0, pPager->nExtra);
  3868   3875         }else{
................................................................................
  5031   5038   
  5032   5039         assert( pPager->eState==PAGER_OPEN );
  5033   5040         assert( (pPager->eLock==SHARED_LOCK)
  5034   5041              || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK)
  5035   5042         );
  5036   5043       }
  5037   5044   
  5038         -    if( !pPager->tempFile 
  5039         -     && (pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0) 
  5040         -    ){
         5045  +    if( !pPager->tempFile && (
         5046  +        pPager->pBackup 
         5047  +     || sqlite3PcachePagecount(pPager->pPCache)>0 
         5048  +     || pPager->pMap
         5049  +    )){
  5041   5050         /* The shared-lock has just been acquired on the database file
  5042   5051         ** and there are already pages in the cache (from a previous
  5043   5052         ** read or write transaction).  Check to see if the database
  5044   5053         ** has been modified.  If the database has changed, flush the
  5045   5054         ** cache.
  5046   5055         **
  5047   5056         ** Database changes is detected by looking at 15 bytes beginning
................................................................................
  5068   5077           }
  5069   5078         }else{
  5070   5079           memset(dbFileVers, 0, sizeof(dbFileVers));
  5071   5080         }
  5072   5081   
  5073   5082         if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
  5074   5083           pager_reset(pPager);
         5084  +
         5085  +        /* Unmap the database file. It is possible that external processes
         5086  +        ** may have truncated the database file and then extended it back
         5087  +        ** to its original size while this process was not holding a lock.
         5088  +        ** In this case there may exist a Pager.pMap mapping that appears
         5089  +        ** to be the right size but is not actually valid. Avoid this
         5090  +        ** possibility by unmapping the db here. */
         5091  +        pagerUnmap(pPager);
         5092  +      }else if( ((i64)nPage*pPager->pageSize)!=pPager->nMap ){
  5075   5093           pagerUnmap(pPager);
  5076   5094         }
  5077   5095       }
  5078   5096   
  5079   5097       /* If there is a WAL file in the file-system, open this database in WAL
  5080   5098       ** mode. Otherwise, the following function call is a no-op.
  5081   5099       */
................................................................................
  5169   5187   ** Since Lookup() never goes to disk, it never has to deal with locks
  5170   5188   ** or journal files.
  5171   5189   */
  5172   5190   int sqlite3PagerAcquire(
  5173   5191     Pager *pPager,      /* The pager open on the database file */
  5174   5192     Pgno pgno,          /* Page number to fetch */
  5175   5193     DbPage **ppPage,    /* Write a pointer to the page here */
  5176         -  int noContent       /* Do not bother reading content from disk if true */
         5194  +  int flags           /* PAGER_ACQUIRE_XXX flags */
  5177   5195   ){
  5178         -  int rc;
  5179         -  PgHdr *pPg;
         5196  +  int rc = SQLITE_OK;
         5197  +  PgHdr *pPg = 0;
         5198  +  const int noContent = (flags & PAGER_ACQUIRE_NOCONTENT);
         5199  +
         5200  +  /* It is acceptable to use a read-only (mmap) page for any page except
         5201  +  ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
         5202  +  ** flag was specified by the caller. And so long as the db is not a 
         5203  +  ** temporary or in-memory database.  */
         5204  +  const int bMmapOk = (
         5205  +      (pgno!=1 && pPager->pWal==0 && !pPager->tempFile && !MEMDB)
         5206  +   && (pPager->eState==PAGER_READER || (flags & PAGER_ACQUIRE_READONLY))
         5207  +  );
  5180   5208   
  5181   5209     assert( pPager->eState>=PAGER_READER );
  5182   5210     assert( assert_pager_state(pPager) );
  5183   5211   
  5184   5212     if( pgno==0 ){
  5185   5213       return SQLITE_CORRUPT_BKPT;
  5186   5214     }
  5187   5215   
  5188   5216     /* If the pager is in the error state, return an error immediately. 
  5189   5217     ** Otherwise, request the page from the PCache layer. */
  5190   5218     if( pPager->errCode!=SQLITE_OK ){
  5191   5219       rc = pPager->errCode;
  5192   5220     }else{
  5193         -    if( pPager->eState==PAGER_READER && pPager->pWal==0 ){
  5194         -      rc = pagerAcquireMapPage(pPager, pgno, &pPg);
  5195         -      if( rc!=SQLITE_OK ) goto pager_acquire_err;
  5196         -      if( pPg ){
  5197         -        *ppPage = pPg;
  5198         -        return SQLITE_OK;
         5221  +
         5222  +    if( bMmapOk ){
         5223  +      if( pPager->pMap==0 ) rc = pagerMap(pPager);
         5224  +      if( rc==SQLITE_OK && pPager->nMap>=((i64)pgno * pPager->pageSize) ){
         5225  +        if( pPager->eState>PAGER_READER ){
         5226  +          (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
         5227  +        }
         5228  +        if( pPg==0 ){
         5229  +          rc = pagerAcquireMapPage(pPager, pgno, &pPg);
         5230  +        }
         5231  +        if( pPg ){
         5232  +          assert( rc==SQLITE_OK );
         5233  +          *ppPage = pPg;
         5234  +          return SQLITE_OK;
         5235  +        }else if( rc!=SQLITE_OK ){
         5236  +          goto pager_acquire_err;
         5237  +        }
  5199   5238         }
  5200   5239       }
  5201   5240   
  5202   5241       rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
  5203   5242     }
  5204   5243   
  5205   5244     if( rc!=SQLITE_OK ){
................................................................................
  5429   5468   int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
  5430   5469     int rc = SQLITE_OK;
  5431   5470   
  5432   5471     if( pPager->errCode ) return pPager->errCode;
  5433   5472     assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR );
  5434   5473     pPager->subjInMemory = (u8)subjInMemory;
  5435   5474   
  5436         -  pagerUnmap(pPager);
  5437         -
  5438   5475     if( ALWAYS(pPager->eState==PAGER_READER) ){
  5439   5476       assert( pPager->pInJournal==0 );
  5440   5477   
  5441   5478       if( pagerUseWal(pPager) ){
  5442   5479         /* If the pager is configured to use locking_mode=exclusive, and an
  5443   5480         ** exclusive lock on the database is not already held, obtain it now.
  5444   5481         */
................................................................................
  5649   5686   int sqlite3PagerWrite(DbPage *pDbPage){
  5650   5687     int rc = SQLITE_OK;
  5651   5688   
  5652   5689     PgHdr *pPg = pDbPage;
  5653   5690     Pager *pPager = pPg->pPager;
  5654   5691     Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
  5655   5692   
         5693  +  assert( (pPg->flags & PGHDR_MMAP)==0 );
  5656   5694     assert( pPager->eState>=PAGER_WRITER_LOCKED );
  5657   5695     assert( pPager->eState!=PAGER_ERROR );
  5658   5696     assert( assert_pager_state(pPager) );
  5659   5697   
  5660         -  /* There must not be any outstanding mmap pages at this point */
  5661         -  assert( pPager->nMmapOut==0 );
  5662         -
  5663   5698     if( nPagePerSector>1 ){
  5664   5699       Pgno nPageCount;          /* Total number of pages in database file */
  5665   5700       Pgno pg1;                 /* First page of the sector pPg is located on. */
  5666   5701       int nPage = 0;            /* Number of pages starting at pg1 to journal */
  5667   5702       int ii;                   /* Loop counter */
  5668   5703       int needSync = 0;         /* True if any page has PGHDR_NEED_SYNC */
  5669   5704   

Changes to src/pager.h.

    74     74   #define PAGER_JOURNALMODE_DELETE      0   /* Commit by deleting journal file */
    75     75   #define PAGER_JOURNALMODE_PERSIST     1   /* Commit by zeroing journal header */
    76     76   #define PAGER_JOURNALMODE_OFF         2   /* Journal omitted.  */
    77     77   #define PAGER_JOURNALMODE_TRUNCATE    3   /* Commit by truncating journal */
    78     78   #define PAGER_JOURNALMODE_MEMORY      4   /* In-memory journal file */
    79     79   #define PAGER_JOURNALMODE_WAL         5   /* Use write-ahead logging */
    80     80   
           81  +/*
           82  +** Flags that make up the mask passed to sqlite3PagerAcquire().
           83  +*/
           84  +#define PAGER_ACQUIRE_NOCONTENT     0x01  /* Do not load data from disk */
           85  +#define PAGER_ACQUIRE_READONLY      0x02  /* Read-only page is acceptable */
           86  +
    81     87   /*
    82     88   ** The remainder of this file contains the declarations of the functions
    83     89   ** that make up the Pager sub-system API. See source code comments for 
    84     90   ** a detailed description of each routine.
    85     91   */
    86     92   
    87     93   /* Open and close a Pager connection. */