SQLite

Check-in [2ba5be3119]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to the btree and pager that reduce the amount of I/O when dealing with the freelist. (1) Avoid journaling pages of a table that is being deleted. (2) Do not read the original content of pages being pulled off of the freelist. (CVS 3671)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 2ba5be311945a4c15b6dce7c01efefb513b9a973
User & Date: drh 2007-03-04 13:15:28.000
Context
2007-03-06
11:42
Do not read the last page of a overflow chain when deleting that chain. Just add the page to the freelist. This reduces I/O. (CVS 3672) (check-in: 6db945f7a7 user: drh tags: trunk)
2007-03-04
13:15
Changes to the btree and pager that reduce the amount of I/O when dealing with the freelist. (1) Avoid journaling pages of a table that is being deleted. (2) Do not read the original content of pages being pulled off of the freelist. (CVS 3671) (check-in: 2ba5be3119 user: drh tags: trunk)
2007-03-02
08:12
Handle the case where the estimated cost of a virtual table scan is larger than SQLITE_BIG_DBL. Ticket #2253. (CVS 3670) (check-in: 52885ed8b7 user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.335 2007/02/10 19:22:36 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.336 2007/03/04 13:15:28 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
  pPage->isInit = 1;
}

/*
** Get a page from the pager.  Initialize the MemPage.pBt and
** MemPage.aData elements if needed.
*/
static int getPage(BtShared *pBt, Pgno pgno, MemPage **ppPage){
  int rc;
  unsigned char *aData;
  MemPage *pPage;
  rc = sqlite3pager_get(pBt->pPager, pgno, (void**)&aData);
  if( rc ) return rc;
  pPage = (MemPage*)&aData[pBt->pageSize];
  pPage->aData = aData;
  pPage->pBt = pBt;
  pPage->pgno = pgno;
  pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
  *ppPage = pPage;







|



|







1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
  pPage->isInit = 1;
}

/*
** Get a page from the pager.  Initialize the MemPage.pBt and
** MemPage.aData elements if needed.
*/
static int getPage(BtShared *pBt, Pgno pgno, MemPage **ppPage, int clrFlag){
  int rc;
  unsigned char *aData;
  MemPage *pPage;
  rc = sqlite3pager_acquire(pBt->pPager, pgno, (void**)&aData, clrFlag);
  if( rc ) return rc;
  pPage = (MemPage*)&aData[pBt->pageSize];
  pPage->aData = aData;
  pPage->pBt = pBt;
  pPage->pgno = pgno;
  pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
  *ppPage = pPage;
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
  MemPage **ppPage,    /* Write the page pointer here */
  MemPage *pParent     /* Parent of the page */
){
  int rc;
  if( pgno==0 ){
    return SQLITE_CORRUPT_BKPT; 
  }
  rc = getPage(pBt, pgno, ppPage);
  if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
    rc = initPage(*ppPage, pParent);
  }
  return rc;
}

/*







|







1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
  MemPage **ppPage,    /* Write the page pointer here */
  MemPage *pParent     /* Parent of the page */
){
  int rc;
  if( pgno==0 ){
    return SQLITE_CORRUPT_BKPT; 
  }
  rc = getPage(pBt, pgno, ppPage, 0);
  if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
    rc = initPage(*ppPage, pParent);
  }
  return rc;
}

/*
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
** is returned if we run out of memory.  SQLITE_PROTOCOL is returned
** if there is a locking protocol violation.
*/
static int lockBtree(BtShared *pBt){
  int rc, pageSize;
  MemPage *pPage1;
  if( pBt->pPage1 ) return SQLITE_OK;
  rc = getPage(pBt, 1, &pPage1);
  if( rc!=SQLITE_OK ) return rc;
  

  /* Do some checking to help insure the file we opened really is
  ** a valid database file. 
  */
  rc = SQLITE_NOTADB;







|







1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
** is returned if we run out of memory.  SQLITE_PROTOCOL is returned
** if there is a locking protocol violation.
*/
static int lockBtree(BtShared *pBt){
  int rc, pageSize;
  MemPage *pPage1;
  if( pBt->pPage1 ) return SQLITE_OK;
  rc = getPage(pBt, 1, &pPage1, 0);
  if( rc!=SQLITE_OK ) return rc;
  

  /* Do some checking to help insure the file we opened really is
  ** a valid database file. 
  */
  rc = SQLITE_NOTADB;
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
  }

  /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
  ** that it points at iFreePage. Also fix the pointer map entry for
  ** iPtrPage.
  */
  if( eType!=PTRMAP_ROOTPAGE ){
    rc = getPage(pBt, iPtrPage, &pPtrPage);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    rc = sqlite3pager_write(pPtrPage->aData);
    if( rc!=SQLITE_OK ){
      releasePage(pPtrPage);
      return rc;







|







2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
  }

  /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
  ** that it points at iFreePage. Also fix the pointer map entry for
  ** iPtrPage.
  */
  if( eType!=PTRMAP_ROOTPAGE ){
    rc = getPage(pBt, iPtrPage, &pPtrPage, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    rc = sqlite3pager_write(pPtrPage->aData);
    if( rc!=SQLITE_OK ){
      releasePage(pPtrPage);
      return rc;
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
      goto autovacuum_out;
    }

    /* If iDbPage is free, do not swap it.  */
    if( eType==PTRMAP_FREEPAGE ){
      continue;
    }
    rc = getPage(pBt, iDbPage, &pDbMemPage);
    if( rc!=SQLITE_OK ) goto autovacuum_out;

    /* Find the next page in the free-list that is not already at the end 
    ** of the file. A page can be pulled off the free list using the 
    ** allocatePage() routine.
    */
    do{







|







2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
      goto autovacuum_out;
    }

    /* If iDbPage is free, do not swap it.  */
    if( eType==PTRMAP_FREEPAGE ){
      continue;
    }
    rc = getPage(pBt, iDbPage, &pDbMemPage, 0);
    if( rc!=SQLITE_OK ) goto autovacuum_out;

    /* Find the next page in the free-list that is not already at the end 
    ** of the file. A page can be pulled off the free list using the 
    ** allocatePage() routine.
    */
    do{
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
    if( rc2!=SQLITE_OK ){
      rc = rc2;
    }

    /* The rollback may have destroyed the pPage1->aData value.  So
    ** call getPage() on page 1 again to make sure pPage1->aData is
    ** set correctly. */
    if( getPage(pBt, 1, &pPage1)==SQLITE_OK ){
      releasePage(pPage1);
    }
    assert( countWriteCursors(pBt)==0 );
    pBt->inTransaction = TRANS_READ;
  }

  if( p->inTrans!=TRANS_NONE ){







|







2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
    if( rc2!=SQLITE_OK ){
      rc = rc2;
    }

    /* The rollback may have destroyed the pPage1->aData value.  So
    ** call getPage() on page 1 again to make sure pPage1->aData is
    ** set correctly. */
    if( getPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
      releasePage(pPage1);
    }
    assert( countWriteCursors(pBt)==0 );
    pBt->inTransaction = TRANS_READ;
  }

  if( p->inTrans!=TRANS_NONE ){
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
    do {
      pPrevTrunk = pTrunk;
      if( pPrevTrunk ){
        iTrunk = get4byte(&pPrevTrunk->aData[0]);
      }else{
        iTrunk = get4byte(&pPage1->aData[32]);
      }
      rc = getPage(pBt, iTrunk, &pTrunk);
      if( rc ){
        pTrunk = 0;
        goto end_allocate_page;
      }

      k = get4byte(&pTrunk->aData[4]);
      if( k==0 && !searchList ){







|







3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
    do {
      pPrevTrunk = pTrunk;
      if( pPrevTrunk ){
        iTrunk = get4byte(&pPrevTrunk->aData[0]);
      }else{
        iTrunk = get4byte(&pPage1->aData[32]);
      }
      rc = getPage(pBt, iTrunk, &pTrunk, 0);
      if( rc ){
        pTrunk = 0;
        goto end_allocate_page;
      }

      k = get4byte(&pTrunk->aData[4]);
      if( k==0 && !searchList ){
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
        }else{
          /* The trunk page is required by the caller but it contains 
          ** pointers to free-list leaves. The first leaf becomes a trunk
          ** page in this case.
          */
          MemPage *pNewTrunk;
          Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
          rc = getPage(pBt, iNewTrunk, &pNewTrunk);
          if( rc!=SQLITE_OK ){
            goto end_allocate_page;
          }
          rc = sqlite3pager_write(pNewTrunk->aData);
          if( rc!=SQLITE_OK ){
            releasePage(pNewTrunk);
            goto end_allocate_page;







|







3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
        }else{
          /* The trunk page is required by the caller but it contains 
          ** pointers to free-list leaves. The first leaf becomes a trunk
          ** page in this case.
          */
          MemPage *pNewTrunk;
          Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
          rc = getPage(pBt, iNewTrunk, &pNewTrunk, 0);
          if( rc!=SQLITE_OK ){
            goto end_allocate_page;
          }
          rc = sqlite3pager_write(pNewTrunk->aData);
          if( rc!=SQLITE_OK ){
            releasePage(pNewTrunk);
            goto end_allocate_page;
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
          TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
                 ": %d more free pages\n",
                 *pPgno, closest+1, k, pTrunk->pgno, n-1));
          if( closest<k-1 ){
            memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
          }
          put4byte(&aData[4], k-1);
          rc = getPage(pBt, *pPgno, ppPage);
          if( rc==SQLITE_OK ){
            sqlite3pager_dont_rollback((*ppPage)->aData);
            rc = sqlite3pager_write((*ppPage)->aData);
            if( rc!=SQLITE_OK ){
              releasePage(*ppPage);
            }
          }







|







3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
          TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
                 ": %d more free pages\n",
                 *pPgno, closest+1, k, pTrunk->pgno, n-1));
          if( closest<k-1 ){
            memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
          }
          put4byte(&aData[4], k-1);
          rc = getPage(pBt, *pPgno, ppPage, 1);
          if( rc==SQLITE_OK ){
            sqlite3pager_dont_rollback((*ppPage)->aData);
            rc = sqlite3pager_write((*ppPage)->aData);
            if( rc!=SQLITE_OK ){
              releasePage(*ppPage);
            }
          }
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
      TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno));
      assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
      (*pPgno)++;
    }
#endif

    assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
    rc = getPage(pBt, *pPgno, ppPage);
    if( rc ) return rc;
    rc = sqlite3pager_write((*ppPage)->aData);
    if( rc!=SQLITE_OK ){
      releasePage(*ppPage);
    }
    TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
  }







|







3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
      TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno));
      assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
      (*pPgno)++;
    }
#endif

    assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
    rc = getPage(pBt, *pPgno, ppPage, 0);
    if( rc ) return rc;
    rc = sqlite3pager_write((*ppPage)->aData);
    if( rc!=SQLITE_OK ){
      releasePage(*ppPage);
    }
    TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
  }
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
    memset(pPage->aData, 0, 8);
    put4byte(&pPage1->aData[32], pPage->pgno);
    TRACE(("FREE-PAGE: %d first\n", pPage->pgno));
  }else{
    /* Other free pages already exist.  Retrive the first trunk page
    ** of the freelist and find out how many leaves it has. */
    MemPage *pTrunk;
    rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk);
    if( rc ) return rc;
    k = get4byte(&pTrunk->aData[4]);
    if( k>=pBt->usableSize/4 - 8 ){
      /* The trunk is full.  Turn the page being freed into a new
      ** trunk page with no leaves. */
      rc = sqlite3pager_write(pPage->aData);
      if( rc ) return rc;







|







3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
    memset(pPage->aData, 0, 8);
    put4byte(&pPage1->aData[32], pPage->pgno);
    TRACE(("FREE-PAGE: %d first\n", pPage->pgno));
  }else{
    /* Other free pages already exist.  Retrive the first trunk page
    ** of the freelist and find out how many leaves it has. */
    MemPage *pTrunk;
    rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0);
    if( rc ) return rc;
    k = get4byte(&pTrunk->aData[4]);
    if( k>=pBt->usableSize/4 - 8 ){
      /* The trunk is full.  Turn the page being freed into a new
      ** trunk page with no leaves. */
      rc = sqlite3pager_write(pPage->aData);
      if( rc ) return rc;
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
  }
  ovflPgno = get4byte(&pCell[info.iOverflow]);
  while( ovflPgno!=0 ){
    MemPage *pOvfl;
    if( ovflPgno>sqlite3pager_pagecount(pBt->pPager) ){
      return SQLITE_CORRUPT_BKPT;
    }
    rc = getPage(pBt, ovflPgno, &pOvfl);
    if( rc ) return rc;
    ovflPgno = get4byte(pOvfl->aData);
    rc = freePage(pOvfl);
    sqlite3pager_unref(pOvfl->aData);
    if( rc ) return rc;
  }
  return SQLITE_OK;







|







3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
  }
  ovflPgno = get4byte(&pCell[info.iOverflow]);
  while( ovflPgno!=0 ){
    MemPage *pOvfl;
    if( ovflPgno>sqlite3pager_pagecount(pBt->pPager) ){
      return SQLITE_CORRUPT_BKPT;
    }
    rc = getPage(pBt, ovflPgno, &pOvfl, 0);
    if( rc ) return rc;
    ovflPgno = get4byte(pOvfl->aData);
    rc = freePage(pOvfl);
    sqlite3pager_unref(pOvfl->aData);
    if( rc ) return rc;
  }
  return SQLITE_OK;
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
      */
      if( pBt->autoVacuum && pgnoPtrmap!=0 && rc==SQLITE_OK ){
        rc = ptrmapPut(pBt, pgnoOvfl, PTRMAP_OVERFLOW2, pgnoPtrmap);
      }
#endif
      if( rc ){
        releasePage(pToRelease);
        /* clearCell(pPage, pCell); */
        return rc;
      }
      put4byte(pPrior, pgnoOvfl);
      releasePage(pToRelease);
      pToRelease = pOvfl;
      pPrior = pOvfl->aData;
      put4byte(pPrior, 0);







<







3971
3972
3973
3974
3975
3976
3977

3978
3979
3980
3981
3982
3983
3984
      */
      if( pBt->autoVacuum && pgnoPtrmap!=0 && rc==SQLITE_OK ){
        rc = ptrmapPut(pBt, pgnoOvfl, PTRMAP_OVERFLOW2, pgnoPtrmap);
      }
#endif
      if( rc ){
        releasePage(pToRelease);

        return rc;
      }
      put4byte(pPrior, pgnoOvfl);
      releasePage(pToRelease);
      pToRelease = pOvfl;
      pPrior = pOvfl->aData;
      put4byte(pPrior, 0);
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
    ** case, then do not do the transfer.  Leave page 1 empty except
    ** for the right-pointer to the child page.  The child page becomes
    ** the virtual root of the tree.
    */
    pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    assert( pgnoChild>0 );
    assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) );
    rc = getPage(pPage->pBt, pgnoChild, &pChild);
    if( rc ) goto end_shallow_balance;
    if( pPage->pgno==1 ){
      rc = initPage(pChild, pPage);
      if( rc ) goto end_shallow_balance;
      assert( pChild->nOverflow==0 );
      if( pChild->nFree>=100 ){
        /* The child information will fit on the root page, so do the







|







4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
    ** case, then do not do the transfer.  Leave page 1 empty except
    ** for the right-pointer to the child page.  The child page becomes
    ** the virtual root of the tree.
    */
    pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    assert( pgnoChild>0 );
    assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) );
    rc = getPage(pPage->pBt, pgnoChild, &pChild, 0);
    if( rc ) goto end_shallow_balance;
    if( pPage->pgno==1 ){
      rc = initPage(pChild, pPage);
      if( rc ) goto end_shallow_balance;
      assert( pChild->nOverflow==0 );
      if( pChild->nFree>=100 ){
        /* The child information will fit on the root page, so do the
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
    }

    if( pgnoMove!=pgnoRoot ){
      u8 eType;
      Pgno iPtrPage;

      releasePage(pPageMove);
      rc = getPage(pBt, pgnoRoot, &pRoot);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
      if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
        releasePage(pRoot);
        return rc;
      }
      assert( eType!=PTRMAP_ROOTPAGE );
      assert( eType!=PTRMAP_FREEPAGE );
      rc = sqlite3pager_write(pRoot->aData);
      if( rc!=SQLITE_OK ){
        releasePage(pRoot);
        return rc;
      }
      rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove);
      releasePage(pRoot);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = getPage(pBt, pgnoRoot, &pRoot);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = sqlite3pager_write(pRoot->aData);
      if( rc!=SQLITE_OK ){
        releasePage(pRoot);
        return rc;







|




















|







5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
    }

    if( pgnoMove!=pgnoRoot ){
      u8 eType;
      Pgno iPtrPage;

      releasePage(pPageMove);
      rc = getPage(pBt, pgnoRoot, &pRoot, 0);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
      if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
        releasePage(pRoot);
        return rc;
      }
      assert( eType!=PTRMAP_ROOTPAGE );
      assert( eType!=PTRMAP_FREEPAGE );
      rc = sqlite3pager_write(pRoot->aData);
      if( rc!=SQLITE_OK ){
        releasePage(pRoot);
        return rc;
      }
      rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove);
      releasePage(pRoot);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = getPage(pBt, pgnoRoot, &pRoot, 0);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = sqlite3pager_write(pRoot->aData);
      if( rc!=SQLITE_OK ){
        releasePage(pRoot);
        return rc;
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539

  if( pgno>sqlite3pager_pagecount(pBt->pPager) ){
    return SQLITE_CORRUPT_BKPT;
  }

  rc = getAndInitPage(pBt, pgno, &pPage, pParent);
  if( rc ) goto cleardatabasepage_out;
  rc = sqlite3pager_write(pPage->aData);
  if( rc ) goto cleardatabasepage_out;
  for(i=0; i<pPage->nCell; i++){
    pCell = findCell(pPage, i);
    if( !pPage->leaf ){
      rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
      if( rc ) goto cleardatabasepage_out;
    }
    rc = clearCell(pPage, pCell);
    if( rc ) goto cleardatabasepage_out;
  }
  if( !pPage->leaf ){
    rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
    if( rc ) goto cleardatabasepage_out;
  }
  if( freePageFlag ){
    rc = freePage(pPage);
  }else{
    zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
  }

cleardatabasepage_out:
  releasePage(pPage);
  return rc;
}







<
<















|







5507
5508
5509
5510
5511
5512
5513


5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536

  if( pgno>sqlite3pager_pagecount(pBt->pPager) ){
    return SQLITE_CORRUPT_BKPT;
  }

  rc = getAndInitPage(pBt, pgno, &pPage, pParent);
  if( rc ) goto cleardatabasepage_out;


  for(i=0; i<pPage->nCell; i++){
    pCell = findCell(pPage, i);
    if( !pPage->leaf ){
      rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
      if( rc ) goto cleardatabasepage_out;
    }
    rc = clearCell(pPage, pCell);
    if( rc ) goto cleardatabasepage_out;
  }
  if( !pPage->leaf ){
    rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
    if( rc ) goto cleardatabasepage_out;
  }
  if( freePageFlag ){
    rc = freePage(pPage);
  }else if( (rc = sqlite3pager_write(pPage->aData))==0 ){
    zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
  }

cleardatabasepage_out:
  releasePage(pPage);
  return rc;
}
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
  ** root page. If an open cursor was using this page a problem would 
  ** occur.
  */
  if( pBt->pCursor ){
    return SQLITE_LOCKED;
  }

  rc = getPage(pBt, (Pgno)iTable, &pPage);
  if( rc ) return rc;
  rc = sqlite3BtreeClearTable(p, iTable);
  if( rc ){
    releasePage(pPage);
    return rc;
  }








|







5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
  ** root page. If an open cursor was using this page a problem would 
  ** occur.
  */
  if( pBt->pCursor ){
    return SQLITE_LOCKED;
  }

  rc = getPage(pBt, (Pgno)iTable, &pPage, 0);
  if( rc ) return rc;
  rc = sqlite3BtreeClearTable(p, iTable);
  if( rc ){
    releasePage(pPage);
    return rc;
  }

5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
      }else{
        /* The table being dropped does not have the largest root-page
        ** number in the database. So move the page that does into the 
        ** gap left by the deleted root-page.
        */
        MemPage *pMove;
        releasePage(pPage);
        rc = getPage(pBt, maxRootPgno, &pMove);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable);
        releasePage(pMove);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        rc = getPage(pBt, maxRootPgno, &pMove);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        rc = freePage(pMove);
        releasePage(pMove);
        if( rc!=SQLITE_OK ){
          return rc;







|








|







5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
      }else{
        /* The table being dropped does not have the largest root-page
        ** number in the database. So move the page that does into the 
        ** gap left by the deleted root-page.
        */
        MemPage *pMove;
        releasePage(pPage);
        rc = getPage(pBt, maxRootPgno, &pMove, 0);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable);
        releasePage(pMove);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        rc = getPage(pBt, maxRootPgno, &pMove, 0);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        rc = freePage(pMove);
        releasePage(pMove);
        if( rc!=SQLITE_OK ){
          return rc;
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
  int hdr;
  int nCell;
  int isInit;
  unsigned char *data;
  char range[20];
  unsigned char payload[20];

  rc = getPage(pBt, (Pgno)pgno, &pPage);
  isInit = pPage->isInit;
  if( pPage->isInit==0 ){
    initPage(pPage, pParent);
  }
  if( rc ){
    return rc;
  }







|







5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
  int hdr;
  int nCell;
  int isInit;
  unsigned char *data;
  char range[20];
  unsigned char payload[20];

  rc = getPage(pBt, (Pgno)pgno, &pPage, 0);
  isInit = pPage->isInit;
  if( pPage->isInit==0 ){
    initPage(pPage, pParent);
  }
  if( rc ){
    return rc;
  }
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178

  /* Check that the page exists
  */
  pBt = pCheck->pBt;
  usableSize = pBt->usableSize;
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){
    checkAppendMsg(pCheck, zContext,
       "unable to get the page. error code=%d", rc);
    return 0;
  }
  if( (rc = initPage(pPage, pParent))!=0 ){
    checkAppendMsg(pCheck, zContext, "initPage() returns error code %d", rc);
    releasePage(pPage);







|







6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175

  /* Check that the page exists
  */
  pBt = pCheck->pBt;
  usableSize = pBt->usableSize;
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  if( (rc = getPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
    checkAppendMsg(pCheck, zContext,
       "unable to get the page. error code=%d", rc);
    return 0;
  }
  if( (rc = initPage(pPage, pParent))!=0 ){
    checkAppendMsg(pCheck, zContext, "initPage() returns error code %d", rc);
    releasePage(pPage);
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.284 2007/03/01 00:29:14 drh Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
#include "os.h"
#include "pager.h"
#include <assert.h>
#include <string.h>







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.285 2007/03/04 13:15:28 drh Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
#include "os.h"
#include "pager.h"
#include <assert.h>
#include <string.h>
2621
2622
2623
2624
2625
2626
2627





2628
2629
2630
2631
2632
2633
2634
2635
2636
** See also sqlite3pager_lookup().  Both this routine and _lookup() attempt
** to find a page in the in-memory cache first.  If the page is not already
** in memory, this routine goes to disk to read it in whereas _lookup()
** just returns 0.  This routine acquires a read-lock the first time it
** has to go to disk, and could also playback an old journal if necessary.
** Since _lookup() never goes to disk, it never has to deal with locks
** or journal files.





*/
int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
  PgHdr *pPg;
  int rc;

  /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
  ** number greater than this, or zero, is requested.
  */
  if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){







>
>
>
>
>

|







2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
** See also sqlite3pager_lookup().  Both this routine and _lookup() attempt
** to find a page in the in-memory cache first.  If the page is not already
** in memory, this routine goes to disk to read it in whereas _lookup()
** just returns 0.  This routine acquires a read-lock the first time it
** has to go to disk, and could also playback an old journal if necessary.
** Since _lookup() never goes to disk, it never has to deal with locks
** or journal files.
**
** If clrFlag is false, the page contents are actually read from disk.
** If clfFlag is true, it means the page is about to be erased and
** rewritten without first being read so there is no point it doing
** the disk I/O.
*/
int sqlite3pager_acquire(Pager *pPager, Pgno pgno, void **ppPage, int clrFlag){
  PgHdr *pPg;
  int rc;

  /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
  ** number greater than this, or zero, is requested.
  */
  if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
2780
2781
2782
2783
2784
2785
2786
2787


2788
2789
2790
2791
2792
2793
2794
      rc = pPager->errCode;
      return rc;
    }

    /* Populate the page with data, either by reading from the database
    ** file, or by setting the entire page to zero.
    */
    if( sqlite3pager_pagecount(pPager)<(int)pgno || MEMDB ){


      memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
    }else{
      assert( MEMDB==0 );
      rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
      if( rc==SQLITE_OK ){
        rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg),
                              pPager->pageSize);







|
>
>







2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
      rc = pPager->errCode;
      return rc;
    }

    /* Populate the page with data, either by reading from the database
    ** file, or by setting the entire page to zero.
    */
    if( sqlite3pager_pagecount(pPager)<(int)pgno || MEMDB
         || (clrFlag && !pPager->alwaysRollback) 
    ){
      memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
    }else{
      assert( MEMDB==0 );
      rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
      if( rc==SQLITE_OK ){
        rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg),
                              pPager->pageSize);
Changes to src/pager.h.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.52 2006/11/06 21:20:26 drh Exp $
*/

#ifndef _PAGER_H_
#define _PAGER_H_

/*
** The default size of a database page.







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.53 2007/03/04 13:15:28 drh Exp $
*/

#ifndef _PAGER_H_
#define _PAGER_H_

/*
** The default size of a database page.
74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
void sqlite3pager_set_busyhandler(Pager*, BusyHandler *pBusyHandler);
void sqlite3pager_set_destructor(Pager*, void(*)(void*,int));
void sqlite3pager_set_reiniter(Pager*, void(*)(void*,int));
int sqlite3pager_set_pagesize(Pager*, int);
int sqlite3pager_read_fileheader(Pager*, int, unsigned char*);
void sqlite3pager_set_cachesize(Pager*, int);
int sqlite3pager_close(Pager *pPager);
int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage);

void *sqlite3pager_lookup(Pager *pPager, Pgno pgno);
int sqlite3pager_ref(void*);
int sqlite3pager_unref(void*);
Pgno sqlite3pager_pagenumber(void*);
int sqlite3pager_write(void*);
int sqlite3pager_iswriteable(void*);
int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void*);







|
>







74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
void sqlite3pager_set_busyhandler(Pager*, BusyHandler *pBusyHandler);
void sqlite3pager_set_destructor(Pager*, void(*)(void*,int));
void sqlite3pager_set_reiniter(Pager*, void(*)(void*,int));
int sqlite3pager_set_pagesize(Pager*, int);
int sqlite3pager_read_fileheader(Pager*, int, unsigned char*);
void sqlite3pager_set_cachesize(Pager*, int);
int sqlite3pager_close(Pager *pPager);
int sqlite3pager_acquire(Pager *pPager, Pgno pgno, void **ppPage, int clrFlag);
#define sqlite3pager_get(A,B,C) sqlite3pager_acquire(A,B,C,0)
void *sqlite3pager_lookup(Pager *pPager, Pgno pgno);
int sqlite3pager_ref(void*);
int sqlite3pager_unref(void*);
Pgno sqlite3pager_pagenumber(void*);
int sqlite3pager_write(void*);
int sqlite3pager_iswriteable(void*);
int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void*);