SQLite

Check-in [b387e2f9d2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow read-only cursors to use mmap pages even if there is an open write transaction.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | experimental-mmap
Files: files | file ages | folders
SHA1: b387e2f9d24dccac1fd040e309f6fc7ec1cfffba
User & Date: dan 2013-03-15 18:29:18.146
Context
2013-03-15
19:13
Fix a dropped error code in pager.c. (check-in: 022fdc986b user: dan tags: experimental-mmap)
18:29
Allow read-only cursors to use mmap pages even if there is an open write transaction. (check-in: b387e2f9d2 user: dan tags: experimental-mmap)
2013-03-14
18:34
Use mmap() to read from the database file in rollback mode. This branch is unix only for now. (check-in: 6f21d9cbf5 user: dan tags: experimental-mmap)
Changes
Side-by-Side Diff Ignore Whitespace Patch
Changes to src/btree.c.
1565
1566
1567
1568
1569
1570
1571
1572


1573
1574
1575


1576

1577
1578

1579
1580
1581
1582
1583
1584
1585
1565
1566
1567
1568
1569
1570
1571

1572
1573
1574
1575
1576
1577
1578
1579
1580
1581

1582
1583
1584
1585
1586
1587
1588
1589







-
+
+



+
+

+

-
+







** means we have started to be concerned about content and the disk
** read should occur at that point.
*/
static int btreeGetPage(
  BtShared *pBt,       /* The btree */
  Pgno pgno,           /* Number of the page to fetch */
  MemPage **ppPage,    /* Return the page in this parameter */
  int noContent        /* Do not load page content if true */
  int noContent,       /* Do not load page content if true */
  int bReadonly        /* True if a read-only (mmap) page is ok */
){
  int rc;
  DbPage *pDbPage;
  int flags = (noContent ? PAGER_ACQUIRE_NOCONTENT : 0) 
            | (bReadonly ? PAGER_ACQUIRE_READONLY : 0);

  assert( noContent==0 || bReadonly==0 );
  assert( sqlite3_mutex_held(pBt->mutex) );
  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent);
  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags);
  if( rc ) return rc;
  *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
  return SQLITE_OK;
}

/*
** Retrieve a page from the pager cache. If the requested page is not
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623




1624
1625
1626
1627
1628
1629
1630
1631

1632
1633
1634
1635
1636
1637
1638
1618
1619
1620
1621
1622
1623
1624



1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635

1636
1637
1638
1639
1640
1641
1642
1643







-
-
-
+
+
+
+







-
+







** convenience wrapper around separate calls to btreeGetPage() and 
** btreeInitPage().
**
** If an error occurs, then the value *ppPage is set to is undefined. It
** may remain unchanged, or it may be set to an invalid value.
*/
static int getAndInitPage(
  BtShared *pBt,          /* The database file */
  Pgno pgno,           /* Number of the page to get */
  MemPage **ppPage     /* Write the page pointer here */
  BtShared *pBt,                  /* The database file */
  Pgno pgno,                      /* Number of the page to get */
  MemPage **ppPage,               /* Write the page pointer here */
  int bReadonly                   /* True if a read-only (mmap) page is ok */
){
  int rc;
  assert( sqlite3_mutex_held(pBt->mutex) );

  if( pgno>btreePagecount(pBt) ){
    rc = SQLITE_CORRUPT_BKPT;
  }else{
    rc = btreeGetPage(pBt, pgno, ppPage, 0);
    rc = btreeGetPage(pBt, pgno, ppPage, 0, bReadonly);
    if( rc==SQLITE_OK ){
      rc = btreeInitPage(*ppPage);
      if( rc!=SQLITE_OK ){
        releasePage(*ppPage);
      }
    }
  }
2346
2347
2348
2349
2350
2351
2352
2353

2354
2355
2356
2357
2358
2359
2360
2351
2352
2353
2354
2355
2356
2357

2358
2359
2360
2361
2362
2363
2364
2365







-
+







  int nPageFile = 0;   /* Number of pages in the database file */
  int nPageHeader;     /* Number of pages in the database according to hdr */

  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( pBt->pPage1==0 );
  rc = sqlite3PagerSharedLock(pBt->pPager);
  if( rc!=SQLITE_OK ) return rc;
  rc = btreeGetPage(pBt, 1, &pPage1, 0);
  rc = btreeGetPage(pBt, 1, &pPage1, 0, 0);
  if( rc!=SQLITE_OK ) return rc;

  /* Do some checking to help insure the file we opened really is
  ** a valid database file. 
  */
  nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData);
  sqlite3PagerPagecount(pBt->pPager, &nPageFile);
2561
2562
2563
2564
2565
2566
2567
2568
2569


2570
2571
2572

2573
2574



2575
2576
2577


2578
2579
2580
2581
2582


2583
2584
2585

2586
2587
2588
2589
2590
2591
2592

2593
2594
2595
2596
2597
2598
2599
2566
2567
2568
2569
2570
2571
2572


2573
2574

2575
2576
2577


2578
2579
2580



2581
2582
2583

2584


2585
2586
2587
2588

2589
2590
2591
2592
2593
2594
2595

2596
2597
2598
2599
2600
2601
2602
2603







-
-
+
+
-


+
-
-
+
+
+
-
-
-
+
+

-

-
-
+
+


-
+






-
+







  p->pBt->nPage = 0;
  rc = newDatabase(p->pBt);
  sqlite3BtreeLeave(p);
  return rc;
}

/*
** If the shared-btree passed as the only argument is holding references
** to mmap pages, replace them with read/write pages. Return SQLITE_OK
** Ensure that any root page references held by open cursors are not
** mmap pages.
** if successful, or an error code otherwise.
*/
static int btreeSwapOutMmap(BtShared *pBt){
  int rc = SQLITE_OK;             /* Return code */
  BtCursor *pCsr;
  for(pCsr=pBt->pCursor; pCsr; pCsr=pCsr->pNext){
  BtCursor *pCsr;                 /* Used to iterate through all open cursors */

  for(pCsr=pBt->pCursor; pCsr && rc==SQLITE_OK; pCsr=pCsr->pNext){
    int i;
    for(i=0; i<=pCsr->iPage; i++){
      MemPage *pPg = pCsr->apPage[i];
    if( pCsr->iPage>=0 ){
      MemPage *pPg = pCsr->apPage[0];
      if( pPg->pDbPage->flags & PGHDR_MMAP ){
        int rc;
        MemPage *pNew = 0;
        rc = btreeGetPage(pBt, pPg->pgno, &pNew, 0);
        if( rc==SQLITE_OK && i==pCsr->iPage ){
        rc = btreeGetPage(pBt, pPg->pgno, &pNew, 0, 0);
        if( rc==SQLITE_OK && pCsr->iPage==0 ){
          pCsr->info.pCell = pNew->aData + (pCsr->info.pCell - pPg->aData);
        }
        pCsr->apPage[i] = pNew;
        pCsr->apPage[0] = pNew;
        releasePage(pPg);
        if( rc!=SQLITE_OK ) return rc;
      }
    }
  }

  return SQLITE_OK;
  return rc;
}

/*
** Attempt to start a new transaction. A write-transaction
** is started if the second argument is nonzero, otherwise a read-
** transaction.  If the second argument is 2 or more and exclusive
** transaction is started, meaning that no other process is allowed
2936
2937
2938
2939
2940
2941
2942
2943

2944
2945
2946
2947
2948
2949
2950
2940
2941
2942
2943
2944
2945
2946

2947
2948
2949
2950
2951
2952
2953
2954







-
+







  }

  /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
  ** that it points at iFreePage. Also fix the pointer map entry for
  ** iPtrPage.
  */
  if( eType!=PTRMAP_ROOTPAGE ){
    rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
    rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    rc = sqlite3PagerWrite(pPtrPage->pDbPage);
    if( rc!=SQLITE_OK ){
      releasePage(pPtrPage);
      return rc;
3020
3021
3022
3023
3024
3025
3026
3027

3028
3029
3030
3031
3032
3033
3034
3024
3025
3026
3027
3028
3029
3030

3031
3032
3033
3034
3035
3036
3037
3038







-
+







      }
    } else {
      Pgno iFreePg;             /* Index of free page to move pLastPg to */
      MemPage *pLastPg;
      u8 eMode = BTALLOC_ANY;   /* Mode parameter for allocateBtreePage() */
      Pgno iNear = 0;           /* nearby parameter for allocateBtreePage() */

      rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0);
      rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0, 0);
      if( rc!=SQLITE_OK ){
        return rc;
      }

      /* If bCommit is zero, this loop runs exactly once and page pLastPg
      ** is swapped with the first free page pulled off the free list.
      **
3112
3113
3114
3115
3116
3117
3118


3119
3120



3121
3122
3123
3124
3125
3126
3127
3116
3117
3118
3119
3120
3121
3122
3123
3124


3125
3126
3127
3128
3129
3130
3131
3132
3133
3134







+
+
-
-
+
+
+







    Pgno nOrig = btreePagecount(pBt);
    Pgno nFree = get4byte(&pBt->pPage1->aData[36]);
    Pgno nFin = finalDbSize(pBt, nOrig, nFree);

    if( nOrig<nFin ){
      rc = SQLITE_CORRUPT_BKPT;
    }else if( nFree>0 ){
      rc = saveAllCursors(pBt, 0, 0);
      if( rc==SQLITE_OK ){
      invalidateAllOverflowCache(pBt);
      rc = incrVacuumStep(pBt, nFin, nOrig, 0);
        invalidateAllOverflowCache(pBt);
        rc = incrVacuumStep(pBt, nFin, nOrig, 0);
      }
      if( rc==SQLITE_OK ){
        rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
        put4byte(&pBt->pPage1->aData[28], pBt->nPage);
      }
    }else{
      rc = SQLITE_DONE;
    }
3434
3435
3436
3437
3438
3439
3440
3441

3442
3443
3444
3445
3446
3447
3448
3441
3442
3443
3444
3445
3446
3447

3448
3449
3450
3451
3452
3453
3454
3455







-
+







    if( rc2!=SQLITE_OK ){
      rc = rc2;
    }

    /* The rollback may have destroyed the pPage1->aData value.  So
    ** call btreeGetPage() on page 1 again to make
    ** sure pPage1->aData is set correctly. */
    if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
    if( btreeGetPage(pBt, 1, &pPage1, 0, 0)==SQLITE_OK ){
      int nPage = get4byte(28+(u8*)pPage1->aData);
      testcase( nPage==0 );
      if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage);
      testcase( pBt->nPage!=nPage );
      pBt->nPage = nPage;
      releasePage(pPage1);
    }
3868
3869
3870
3871
3872
3873
3874
3875

3876
3877
3878
3879
3880
3881
3882
3875
3876
3877
3878
3879
3880
3881

3882
3883
3884
3885
3886
3887
3888
3889







-
+







      }
    }
  }
#endif

  assert( next==0 || rc==SQLITE_DONE );
  if( rc==SQLITE_OK ){
    rc = btreeGetPage(pBt, ovfl, &pPage, 0);
    rc = btreeGetPage(pBt, ovfl, &pPage, 0, (ppPage==0));
    assert( rc==SQLITE_OK || pPage==0 );
    if( rc==SQLITE_OK ){
      next = get4byte(pPage->aData);
    }
  }

  *pPgnoNext = next;
4089
4090
4091
4092
4093
4094
4095
4096



4097
4098
4099
4100
4101
4102
4103
4096
4097
4098
4099
4100
4101
4102

4103
4104
4105
4106
4107
4108
4109
4110
4111
4112







-
+
+
+







          nextPage = get4byte(aWrite);
          memcpy(aWrite, aSave, 4);
        }else
#endif

        {
          DbPage *pDbPage;
          rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
          rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage,
              (eOp==0 ? PAGER_ACQUIRE_READONLY : 0)
          );
          if( rc==SQLITE_OK ){
            aPayload = sqlite3PagerGetData(pDbPage);
            nextPage = get4byte(aPayload);
            rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
            sqlite3PagerUnref(pDbPage);
            offset = 0;
          }
4268
4269
4270
4271
4272
4273
4274

4275
4276
4277
4278

4279
4280
4281
4282
4283
4284
4285
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287

4288
4289
4290
4291
4292
4293
4294
4295







+



-
+







  int i = pCur->iPage;
  MemPage *pNewPage;
  BtShared *pBt = pCur->pBt;

  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );
  assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
  assert( pCur->iPage>=0 );
  if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
    return SQLITE_CORRUPT_BKPT;
  }
  rc = getAndInitPage(pBt, newPgno, &pNewPage);
  rc = getAndInitPage(pBt, newPgno, &pNewPage, (pCur->wrFlag==0));
  if( rc ) return rc;
  pCur->apPage[i+1] = pNewPage;
  pCur->aiIdx[i+1] = 0;
  pCur->iPage++;

  pCur->info.nSize = 0;
  pCur->validNKey = 0;
4388
4389
4390
4391
4392
4393
4394
4395

4396
4397
4398
4399
4400
4401
4402
4398
4399
4400
4401
4402
4403
4404

4405
4406
4407
4408
4409
4410
4411
4412







-
+







      releasePage(pCur->apPage[i]);
    }
    pCur->iPage = 0;
  }else if( pCur->pgnoRoot==0 ){
    pCur->eState = CURSOR_INVALID;
    return SQLITE_OK;
  }else{
    rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]);
    rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0], 0);
    if( rc!=SQLITE_OK ){
      pCur->eState = CURSOR_INVALID;
      return rc;
    }
    pCur->iPage = 0;

    /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor
5002
5003
5004
5005
5006
5007
5008
5009

5010
5011
5012
5013
5014
5015
5016
5012
5013
5014
5015
5016
5017
5018

5019
5020
5021
5022
5023
5024
5025
5026







-
+







      }else{
        iTrunk = get4byte(&pPage1->aData[32]);
      }
      testcase( iTrunk==mxPage );
      if( iTrunk>mxPage ){
        rc = SQLITE_CORRUPT_BKPT;
      }else{
        rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
        rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0);
      }
      if( rc ){
        pTrunk = 0;
        goto end_allocate_page;
      }
      assert( pTrunk!=0 );
      assert( pTrunk->aData!=0 );
5066
5067
5068
5069
5070
5071
5072
5073

5074
5075
5076
5077
5078
5079
5080
5076
5077
5078
5079
5080
5081
5082

5083
5084
5085
5086
5087
5088
5089
5090







-
+







          MemPage *pNewTrunk;
          Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
          if( iNewTrunk>mxPage ){ 
            rc = SQLITE_CORRUPT_BKPT;
            goto end_allocate_page;
          }
          testcase( iNewTrunk==mxPage );
          rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
          rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0, 0);
          if( rc!=SQLITE_OK ){
            goto end_allocate_page;
          }
          rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
          if( rc!=SQLITE_OK ){
            releasePage(pNewTrunk);
            goto end_allocate_page;
5146
5147
5148
5149
5150
5151
5152
5153

5154
5155
5156
5157
5158
5159
5160
5156
5157
5158
5159
5160
5161
5162

5163
5164
5165
5166
5167
5168
5169
5170







-
+







          rc = sqlite3PagerWrite(pTrunk->pDbPage);
          if( rc ) goto end_allocate_page;
          if( closest<k-1 ){
            memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
          }
          put4byte(&aData[4], k-1);
          noContent = !btreeGetHasContent(pBt, *pPgno);
          rc = btreeGetPage(pBt, *pPgno, ppPage, noContent);
          rc = btreeGetPage(pBt, *pPgno, ppPage, noContent, 0);
          if( rc==SQLITE_OK ){
            rc = sqlite3PagerWrite((*ppPage)->pDbPage);
            if( rc!=SQLITE_OK ){
              releasePage(*ppPage);
            }
          }
          searchList = 0;
5194
5195
5196
5197
5198
5199
5200
5201

5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215

5216
5217
5218
5219
5220
5221
5222
5204
5205
5206
5207
5208
5209
5210

5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224

5225
5226
5227
5228
5229
5230
5231
5232







-
+













-
+







      /* If *pPgno refers to a pointer-map page, allocate two new pages
      ** at the end of the file instead of one. The first allocated page
      ** becomes a new pointer-map page, the second is used by the caller.
      */
      MemPage *pPg = 0;
      TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
      assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
      rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent);
      rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent, 0);
      if( rc==SQLITE_OK ){
        rc = sqlite3PagerWrite(pPg->pDbPage);
        releasePage(pPg);
      }
      if( rc ) return rc;
      pBt->nPage++;
      if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; }
    }
#endif
    put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage);
    *pPgno = pBt->nPage;

    assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
    rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent);
    rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent, 0);
    if( rc ) return rc;
    rc = sqlite3PagerWrite((*ppPage)->pDbPage);
    if( rc!=SQLITE_OK ){
      releasePage(*ppPage);
    }
    TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
  }
5276
5277
5278
5279
5280
5281
5282
5283

5284
5285
5286
5287
5288
5289
5290
5286
5287
5288
5289
5290
5291
5292

5293
5294
5295
5296
5297
5298
5299
5300







-
+







  nFree = get4byte(&pPage1->aData[36]);
  put4byte(&pPage1->aData[36], nFree+1);

  if( pBt->btsFlags & BTS_SECURE_DELETE ){
    /* If the secure_delete option is enabled, then
    ** always fully overwrite deleted information with zeros.
    */
    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) )
    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0, 0))!=0) )
     ||            ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0)
    ){
      goto freepage_out;
    }
    memset(pPage->aData, 0, pPage->pBt->pageSize);
  }

5303
5304
5305
5306
5307
5308
5309
5310

5311
5312
5313
5314
5315
5316
5317
5313
5314
5315
5316
5317
5318
5319

5320
5321
5322
5323
5324
5325
5326
5327







-
+







  ** first trunk page in the current free-list. This block tests if it
  ** is possible to add the page as a new free-list leaf.
  */
  if( nFree!=0 ){
    u32 nLeaf;                /* Initial number of leaf cells on trunk page */

    iTrunk = get4byte(&pPage1->aData[32]);
    rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
    rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0);
    if( rc!=SQLITE_OK ){
      goto freepage_out;
    }

    nLeaf = get4byte(&pTrunk->aData[4]);
    assert( pBt->usableSize>32 );
    if( nLeaf > (u32)pBt->usableSize/4 - 2 ){
5349
5350
5351
5352
5353
5354
5355
5356

5357
5358
5359
5360
5361
5362
5363
5359
5360
5361
5362
5363
5364
5365

5366
5367
5368
5369
5370
5371
5372
5373







-
+








  /* If control flows to this point, then it was not possible to add the
  ** the page being freed as a leaf page of the first trunk in the free-list.
  ** Possibly because the free-list is empty, or possibly because the 
  ** first trunk in the free-list is full. Either way, the page being freed
  ** will become the new first trunk page in the free-list.
  */
  if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){
  if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0, 0)) ){
    goto freepage_out;
  }
  rc = sqlite3PagerWrite(pPage->pDbPage);
  if( rc!=SQLITE_OK ){
    goto freepage_out;
  }
  put4byte(pPage->aData, iTrunk);
6150
6151
6152
6153
6154
6155
6156
6157

6158
6159
6160
6161
6162
6163
6164
6160
6161
6162
6163
6164
6165
6166

6167
6168
6169
6170
6171
6172
6173
6174







-
+







  if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){
    pRight = &pParent->aData[pParent->hdrOffset+8];
  }else{
    pRight = findCell(pParent, i+nxDiv-pParent->nOverflow);
  }
  pgno = get4byte(pRight);
  while( 1 ){
    rc = getAndInitPage(pBt, pgno, &apOld[i]);
    rc = getAndInitPage(pBt, pgno, &apOld[i], 0);
    if( rc ){
      memset(apOld, 0, (i+1)*sizeof(MemPage*));
      goto balance_cleanup;
    }
    nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
    if( (i--)==0 ) break;

7241
7242
7243
7244
7245
7246
7247
7248

7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269

7270
7271
7272
7273
7274
7275
7276
7251
7252
7253
7254
7255
7256
7257

7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278

7279
7280
7281
7282
7283
7284
7285
7286







-
+




















-
+







      */
      u8 eType = 0;
      Pgno iPtrPage = 0;

      releasePage(pPageMove);

      /* Move the page currently at pgnoRoot to pgnoMove. */
      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
      if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
        rc = SQLITE_CORRUPT_BKPT;
      }
      if( rc!=SQLITE_OK ){
        releasePage(pRoot);
        return rc;
      }
      assert( eType!=PTRMAP_ROOTPAGE );
      assert( eType!=PTRMAP_FREEPAGE );
      rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0);
      releasePage(pRoot);

      /* Obtain the page at pgnoRoot */
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = sqlite3PagerWrite(pRoot->pDbPage);
      if( rc!=SQLITE_OK ){
        releasePage(pRoot);
        return rc;
7338
7339
7340
7341
7342
7343
7344
7345

7346
7347
7348
7349
7350
7351
7352
7348
7349
7350
7351
7352
7353
7354

7355
7356
7357
7358
7359
7360
7361
7362







-
+







  int i;

  assert( sqlite3_mutex_held(pBt->mutex) );
  if( pgno>btreePagecount(pBt) ){
    return SQLITE_CORRUPT_BKPT;
  }

  rc = getAndInitPage(pBt, pgno, &pPage);
  rc = getAndInitPage(pBt, pgno, &pPage, 0);
  if( rc ) return rc;
  for(i=0; i<pPage->nCell; i++){
    pCell = findCell(pPage, i);
    if( !pPage->leaf ){
      rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange);
      if( rc ) goto cleardatabasepage_out;
    }
7440
7441
7442
7443
7444
7445
7446
7447

7448
7449
7450
7451
7452
7453
7454
7450
7451
7452
7453
7454
7455
7456

7457
7458
7459
7460
7461
7462
7463
7464







-
+







  ** This error is caught long before control reaches this point.
  */
  if( NEVER(pBt->pCursor) ){
    sqlite3ConnectionBlocked(p->db, pBt->pCursor->pBtree->db);
    return SQLITE_LOCKED_SHAREDCACHE;
  }

  rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
  rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0, 0);
  if( rc ) return rc;
  rc = sqlite3BtreeClearTable(p, iTable, 0);
  if( rc ){
    releasePage(pPage);
    return rc;
  }

7475
7476
7477
7478
7479
7480
7481
7482

7483
7484
7485
7486
7487
7488
7489
7490
7491
7492

7493
7494
7495
7496
7497
7498
7499
7485
7486
7487
7488
7489
7490
7491

7492
7493
7494
7495
7496
7497
7498
7499
7500
7501

7502
7503
7504
7505
7506
7507
7508
7509







-
+









-
+







      }else{
        /* The table being dropped does not have the largest root-page
        ** number in the database. So move the page that does into the 
        ** gap left by the deleted root-page.
        */
        MemPage *pMove;
        releasePage(pPage);
        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0);
        releasePage(pMove);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        pMove = 0;
        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0);
        freePage(pMove, &rc);
        releasePage(pMove);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        *piMoved = maxRootPgno;
      }
7897
7898
7899
7900
7901
7902
7903
7904

7905
7906
7907
7908
7909
7910
7911
7907
7908
7909
7910
7911
7912
7913

7914
7915
7916
7917
7918
7919
7920
7921







-
+








  /* Check that the page exists
  */
  pBt = pCheck->pBt;
  usableSize = pBt->usableSize;
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
  if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0, 0))!=0 ){
    checkAppendMsg(pCheck, zContext,
       "unable to get the page. error code=%d", rc);
    return 0;
  }

  /* Clear MemPage.isInit to make sure the corruption detection code in
  ** btreeInitPage() is executed.  */
Changes to src/pager.c.
654
655
656
657
658
659
660

661
662
663
664
665
666
667
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668







+







  sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
  PagerSavepoint *aSavepoint; /* Array of active savepoints */
  int nSavepoint;             /* Number of elements in aSavepoint[] */
  char dbFileVers[16];        /* Changes whenever database file changes */

  void *pMap;                 /* Memory mapped prefix of database file */
  i64 nMap;                   /* Size of mapping at pMap in bytes */ 
  i64 nMapValid;              /* Bytes at pMap known to be valid */
  int nMmapOut;               /* Number of mmap pages currently outstanding */
  PgHdr *pFree;               /* List of free mmap page headers (pDirty) */
  /*
  ** End of the routinely-changing class members
  ***************************************************************************/

  u16 nExtra;                 /* Add this many bytes to each in-memory page */
2508
2509
2510
2511
2512
2513
2514



2515
2516
2517
2518
2519
2520
2521
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525







+
+
+







    assert( pPager->eLock==EXCLUSIVE_LOCK );
    /* TODO: Is it safe to use Pager.dbFileSize here? */
    rc = sqlite3OsFileSize(pPager->fd, &currentSize);
    newSize = szPage*(i64)nPage;
    if( rc==SQLITE_OK && currentSize!=newSize ){
      if( currentSize>newSize ){
        rc = sqlite3OsTruncate(pPager->fd, newSize);
        if( newSize<pPager->nMapValid ){
          pPager->nMapValid = newSize;
        }
      }else if( (currentSize+szPage)<=newSize ){
        char *pTmp = pPager->pTmpSpace;
        memset(pTmp, 0, szPage);
        testcase( (newSize-szPage) == currentSize );
        testcase( (newSize-szPage) >  currentSize );
        rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage);
      }
3814
3815
3816
3817
3818
3819
3820

3821
3822
3823
3824
3825
3826
3827
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832







+







** Unmap any mapping of the database file.
*/
static int pagerUnmap(Pager *pPager){
  if( pPager->pMap ){
    munmap(pPager->pMap, pPager->nMap);
    pPager->pMap = 0;
    pPager->nMap = 0;
    pPager->nMapValid = 0;
  }
  return SQLITE_OK;
}

static int pagerMap(Pager *pPager){
  int rc;
  i64 sz = 0;
3835
3836
3837
3838
3839
3840
3841
3842

3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861



3862
3863
3864
3865
3866
3867
3868
3840
3841
3842
3843
3844
3845
3846

3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865

3866
3867
3868
3869
3870
3871
3872
3873
3874
3875







-
+


















-
+
+
+







    if( rc==SQLITE_OK ){
      void *pMap = mmap(0, sz, PROT_READ, MAP_SHARED, fd, 0);
      if( pMap==MAP_FAILED ){
      assert( 0 );
        return SQLITE_IOERR;
      }
      pPager->pMap = pMap;
      pPager->nMap = sz;
      pPager->nMapValid = pPager->nMap = sz;
    }
  }

  return rc;
}

static int pagerAcquireMapPage(Pager *pPager, Pgno pgno, PgHdr **ppPage){
  int rc;
  *ppPage = 0;

  assert( pPager->pWal==0 );

  if( MEMDB==0 && pPager->tempFile==0 ){
    if( pPager->pMap==0 ){
      rc = pagerMap(pPager);
      if( rc!=SQLITE_OK ) return rc;
    }

    if( pgno!=1 && pPager->pMap && pPager->nMap>=((i64)pgno*pPager->pageSize) ){
    if( pgno!=1 && pPager->pMap 
     && pPager->nMapValid>=((i64)pgno*pPager->pageSize) 
    ){
      PgHdr *p;
      if( pPager->pFree ){
        p = pPager->pFree;
        pPager->pFree = p->pDirty;
        p->pDirty = 0;
        memset(p->pExtra, 0, pPager->nExtra);
      }else{
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040





5041
5042
5043
5044
5045
5046
5047
5038
5039
5040
5041
5042
5043
5044



5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056







-
-
-
+
+
+
+
+








      assert( pPager->eState==PAGER_OPEN );
      assert( (pPager->eLock==SHARED_LOCK)
           || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK)
      );
    }

    if( !pPager->tempFile 
     && (pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0) 
    ){
    if( !pPager->tempFile && (
        pPager->pBackup 
     || sqlite3PcachePagecount(pPager->pPCache)>0 
     || pPager->pMap
    )){
      /* The shared-lock has just been acquired on the database file
      ** and there are already pages in the cache (from a previous
      ** read or write transaction).  Check to see if the database
      ** has been modified.  If the database has changed, flush the
      ** cache.
      **
      ** Database changes is detected by looking at 15 bytes beginning
5068
5069
5070
5071
5072
5073
5074









5075
5076
5077
5078
5079
5080
5081
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099







+
+
+
+
+
+
+
+
+







        }
      }else{
        memset(dbFileVers, 0, sizeof(dbFileVers));
      }

      if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
        pager_reset(pPager);

        /* Unmap the database file. It is possible that external processes
        ** may have truncated the database file and then extended it back
        ** to its original size while this process was not holding a lock.
        ** In this case there may exist a Pager.pMap mapping that appears
        ** to be the right size but is not actually valid. Avoid this
        ** possibility by unmapping the db here. */
        pagerUnmap(pPager);
      }else if( ((i64)nPage*pPager->pageSize)!=pPager->nMap ){
        pagerUnmap(pPager);
      }
    }

    /* If there is a WAL file in the file-system, open this database in WAL
    ** mode. Otherwise, the following function call is a no-op.
    */
5169
5170
5171
5172
5173
5174
5175
5176

5177
5178
5179












5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192




5193
5194





5195
5196
5197
5198








5199
5200
5201
5202
5203
5204
5205
5187
5188
5189
5190
5191
5192
5193

5194
5195


5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224


5225
5226
5227
5228
5229




5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244







-
+

-
-
+
+
+
+
+
+
+
+
+
+
+
+













+
+
+
+
-
-
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+







** Since Lookup() never goes to disk, it never has to deal with locks
** or journal files.
*/
int sqlite3PagerAcquire(
  Pager *pPager,      /* The pager open on the database file */
  Pgno pgno,          /* Page number to fetch */
  DbPage **ppPage,    /* Write a pointer to the page here */
  int noContent       /* Do not bother reading content from disk if true */
  int flags           /* PAGER_ACQUIRE_XXX flags */
){
  int rc;
  PgHdr *pPg;
  int rc = SQLITE_OK;
  PgHdr *pPg = 0;
  const int noContent = (flags & PAGER_ACQUIRE_NOCONTENT);

  /* It is acceptable to use a read-only (mmap) page for any page except
  ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
  ** flag was specified by the caller. And so long as the db is not a 
  ** temporary or in-memory database.  */
  const int bMmapOk = (
      (pgno!=1 && pPager->pWal==0 && !pPager->tempFile && !MEMDB)
   && (pPager->eState==PAGER_READER || (flags & PAGER_ACQUIRE_READONLY))
  );

  assert( pPager->eState>=PAGER_READER );
  assert( assert_pager_state(pPager) );

  if( pgno==0 ){
    return SQLITE_CORRUPT_BKPT;
  }

  /* If the pager is in the error state, return an error immediately. 
  ** Otherwise, request the page from the PCache layer. */
  if( pPager->errCode!=SQLITE_OK ){
    rc = pPager->errCode;
  }else{

    if( bMmapOk ){
      if( pPager->pMap==0 ) rc = pagerMap(pPager);
      if( rc==SQLITE_OK && pPager->nMap>=((i64)pgno * pPager->pageSize) ){
    if( pPager->eState==PAGER_READER && pPager->pWal==0 ){
      rc = pagerAcquireMapPage(pPager, pgno, &pPg);
        if( pPager->eState>PAGER_READER ){
          (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
        }
        if( pPg==0 ){
          rc = pagerAcquireMapPage(pPager, pgno, &pPg);
      if( rc!=SQLITE_OK ) goto pager_acquire_err;
      if( pPg ){
        *ppPage = pPg;
        return SQLITE_OK;
        }
        if( pPg ){
          assert( rc==SQLITE_OK );
          *ppPage = pPg;
          return SQLITE_OK;
        }else if( rc!=SQLITE_OK ){
          goto pager_acquire_err;
        }
      }
    }

    rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
  }

  if( rc!=SQLITE_OK ){
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5468
5469
5470
5471
5472
5473
5474


5475
5476
5477
5478
5479
5480
5481







-
-







int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
  int rc = SQLITE_OK;

  if( pPager->errCode ) return pPager->errCode;
  assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR );
  pPager->subjInMemory = (u8)subjInMemory;

  pagerUnmap(pPager);

  if( ALWAYS(pPager->eState==PAGER_READER) ){
    assert( pPager->pInJournal==0 );

    if( pagerUseWal(pPager) ){
      /* If the pager is configured to use locking_mode=exclusive, and an
      ** exclusive lock on the database is not already held, obtain it now.
      */
5649
5650
5651
5652
5653
5654
5655

5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697



5698
5699
5700
5701
5702
5703
5704







+




-
-
-







int sqlite3PagerWrite(DbPage *pDbPage){
  int rc = SQLITE_OK;

  PgHdr *pPg = pDbPage;
  Pager *pPager = pPg->pPager;
  Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);

  assert( (pPg->flags & PGHDR_MMAP)==0 );
  assert( pPager->eState>=PAGER_WRITER_LOCKED );
  assert( pPager->eState!=PAGER_ERROR );
  assert( assert_pager_state(pPager) );

  /* There must not be any outstanding mmap pages at this point */
  assert( pPager->nMmapOut==0 );

  if( nPagePerSector>1 ){
    Pgno nPageCount;          /* Total number of pages in database file */
    Pgno pg1;                 /* First page of the sector pPg is located on. */
    int nPage = 0;            /* Number of pages starting at pg1 to journal */
    int ii;                   /* Loop counter */
    int needSync = 0;         /* True if any page has PGHDR_NEED_SYNC */

Changes to src/pager.h.
74
75
76
77
78
79
80






81
82
83
84
85
86
87
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93







+
+
+
+
+
+







#define PAGER_JOURNALMODE_DELETE      0   /* Commit by deleting journal file */
#define PAGER_JOURNALMODE_PERSIST     1   /* Commit by zeroing journal header */
#define PAGER_JOURNALMODE_OFF         2   /* Journal omitted.  */
#define PAGER_JOURNALMODE_TRUNCATE    3   /* Commit by truncating journal */
#define PAGER_JOURNALMODE_MEMORY      4   /* In-memory journal file */
#define PAGER_JOURNALMODE_WAL         5   /* Use write-ahead logging */

/*
** Flags that make up the mask passed to sqlite3PagerAcquire().
*/
#define PAGER_ACQUIRE_NOCONTENT     0x01  /* Do not load data from disk */
#define PAGER_ACQUIRE_READONLY      0x02  /* Read-only page is acceptable */

/*
** The remainder of this file contains the declarations of the functions
** that make up the Pager sub-system API. See source code comments for 
** a detailed description of each routine.
*/

/* Open and close a Pager connection. */