SQLite

Check-in [6b998f3066]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid parsing the structure of b-tree pages (in sqlite3BtreeInitPage) more than is necessary. (CVS 5720)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 6b998f3066754e219c266501327e5578c9406b63
User & Date: danielk1977 2008-09-18 17:34:44.000
Context
2008-09-18
18:17
Fix a segfault introduced by (5720) that can follow an IO error. (CVS 5721) (check-in: 05d3462688 user: danielk1977 tags: trunk)
17:34
Avoid parsing the structure of b-tree pages (in sqlite3BtreeInitPage) more than is necessary. (CVS 5720) (check-in: 6b998f3066 user: danielk1977 tags: trunk)
13:49
Update the version number to 3.6.3. (CVS 5719) (check-in: 419764b35c user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.512 2008/09/18 01:08:16 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"












|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.513 2008/09/18 17:34:44 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"

937
938
939
940
941
942
943

944
945
946
947
948
949
950
951
952

953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994

995
996
997
998
999
1000
1001
  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
  assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
  assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
  if( pPage==pParent ){
    return SQLITE_CORRUPT_BKPT;
  }

  if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
    /* The parent page should never change unless the file is corrupt */
    return SQLITE_CORRUPT_BKPT;
  }
  if( pPage->isInit ) return SQLITE_OK;
  if( pPage->pParent==0 && pParent!=0 ){
    pPage->pParent = pParent;
    sqlite3PagerRef(pParent->pDbPage);
  }

  hdr = pPage->hdrOffset;
  data = pPage->aData;
  if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
  assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
  pPage->maskPage = pBt->pageSize - 1;
  pPage->nOverflow = 0;
  pPage->idxShift = 0;
  usableSize = pBt->usableSize;
  pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
  top = get2byte(&data[hdr+5]);
  pPage->nCell = get2byte(&data[hdr+3]);
  if( pPage->nCell>MX_CELL(pBt) ){
    /* To many cells for a single page.  The page must be corrupt */
    return SQLITE_CORRUPT_BKPT;
  }
  if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
    /* All pages must have at least one cell, except for root pages */
    return SQLITE_CORRUPT_BKPT;
  }

  /* Compute the total free space on the page */
  pc = get2byte(&data[hdr+1]);
  nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
  while( pc>0 ){
    int next, size;
    if( pc>usableSize-4 ){
      /* Free block is off the page */
      return SQLITE_CORRUPT_BKPT; 
    }
    next = get2byte(&data[pc]);
    size = get2byte(&data[pc+2]);
    if( next>0 && next<=pc+size+3 ){
      /* Free blocks must be in accending order */
      return SQLITE_CORRUPT_BKPT; 
    }
    nFree += size;
    pc = next;
  }
  pPage->nFree = nFree;
  if( nFree>=usableSize ){
    /* Free space cannot exceed total page size */
    return SQLITE_CORRUPT_BKPT; 

  }

#if 0
  /* Check that all the offsets in the cell offset array are within range. 
  ** 
  ** Omitting this consistency check and using the pPage->maskPage mask
  ** to prevent overrunning the page buffer in findCell() results in a







>
|



|
|



>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>







937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
  assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
  assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
  if( pPage==pParent ){
    return SQLITE_CORRUPT_BKPT;
  }
  if( (pPage->pParent!=pParent)
   && (pPage->pParent!=0 || pPage->isInit==PAGE_ISINIT_FULL) ){
    /* The parent page should never change unless the file is corrupt */
    return SQLITE_CORRUPT_BKPT;
  }
  if( pPage->isInit==PAGE_ISINIT_FULL ) return SQLITE_OK;
  if( pParent!=0 ){
    pPage->pParent = pParent;
    sqlite3PagerRef(pParent->pDbPage);
  }
  if( pPage->isInit==PAGE_ISINIT_NONE ){
    hdr = pPage->hdrOffset;
    data = pPage->aData;
    if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
    assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
    pPage->maskPage = pBt->pageSize - 1;
    pPage->nOverflow = 0;
    pPage->idxShift = 0;
    usableSize = pBt->usableSize;
    pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
    top = get2byte(&data[hdr+5]);
    pPage->nCell = get2byte(&data[hdr+3]);
    if( pPage->nCell>MX_CELL(pBt) ){
      /* To many cells for a single page.  The page must be corrupt */
      return SQLITE_CORRUPT_BKPT;
    }
    if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
      /* All pages must have at least one cell, except for root pages */
      return SQLITE_CORRUPT_BKPT;
    }
  
    /* Compute the total free space on the page */
    pc = get2byte(&data[hdr+1]);
    nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
    while( pc>0 ){
      int next, size;
      if( pc>usableSize-4 ){
        /* Free block is off the page */
        return SQLITE_CORRUPT_BKPT; 
      }
      next = get2byte(&data[pc]);
      size = get2byte(&data[pc+2]);
      if( next>0 && next<=pc+size+3 ){
        /* Free blocks must be in accending order */
        return SQLITE_CORRUPT_BKPT; 
      }
      nFree += size;
      pc = next;
    }
    pPage->nFree = nFree;
    if( nFree>=usableSize ){
      /* Free space cannot exceed total page size */
      return SQLITE_CORRUPT_BKPT; 
    }
  }

#if 0
  /* Check that all the offsets in the cell offset array are within range. 
  ** 
  ** Omitting this consistency check and using the pPage->maskPage mask
  ** to prevent overrunning the page buffer in findCell() results in a
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
    for(pOff=&data[cellOffset]; pOff!=pEnd && !((*pOff)&mask); pOff+=2);
    if( pOff!=pEnd ){
      return SQLITE_CORRUPT_BKPT;
    }
  }
#endif

  pPage->isInit = 1;
  return SQLITE_OK;
}

/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/







|







1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
    for(pOff=&data[cellOffset]; pOff!=pEnd && !((*pOff)&mask); pOff+=2);
    if( pOff!=pEnd ){
      return SQLITE_CORRUPT_BKPT;
    }
  }
#endif

  pPage->isInit = PAGE_ISINIT_FULL;
  return SQLITE_OK;
}

/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
  pPage->hdrOffset = hdr;
  pPage->cellOffset = first;
  pPage->nOverflow = 0;
  assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
  pPage->maskPage = pBt->pageSize - 1;
  pPage->idxShift = 0;
  pPage->nCell = 0;
  pPage->isInit = 1;
}


/*
** Convert a DbPage obtained from the pager into a MemPage used by
** the btree layer.
*/







|







1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
  pPage->hdrOffset = hdr;
  pPage->cellOffset = first;
  pPage->nOverflow = 0;
  assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
  pPage->maskPage = pBt->pageSize - 1;
  pPage->idxShift = 0;
  pPage->nCell = 0;
  pPage->isInit = PAGE_ISINIT_FULL;
}


/*
** Convert a DbPage obtained from the pager into a MemPage used by
** the btree layer.
*/
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
  MemPage *pParent     /* Parent of the page */
){
  int rc;
  DbPage *pDbPage;
  MemPage *pPage;

  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( !pParent || pParent->isInit );
  if( pgno==0 ){
    return SQLITE_CORRUPT_BKPT; 
  }

  /* It is often the case that the page we want is already in cache.
  ** If so, get it directly.  This saves us from having to call
  ** pagerPagecount() to make sure pgno is within limits, which results







|







1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
  MemPage *pParent     /* Parent of the page */
){
  int rc;
  DbPage *pDbPage;
  MemPage *pPage;

  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( !pParent || pParent->isInit==PAGE_ISINIT_FULL );
  if( pgno==0 ){
    return SQLITE_CORRUPT_BKPT; 
  }

  /* It is often the case that the page we want is already in cache.
  ** If so, get it directly.  This saves us from having to call
  ** pagerPagecount() to make sure pgno is within limits, which results
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
    if( pgno>pagerPagecount(pBt->pPager) ){
      return SQLITE_CORRUPT_BKPT; 
    }
    rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0);
    if( rc ) return rc;
    pPage = *ppPage;
  }
  if( pPage->isInit==0 ){
     rc = sqlite3BtreeInitPage(pPage, pParent);
  }else if( pParent && (pPage==pParent || pPage->pParent!=pParent) ){
    /* This condition indicates a loop in the b-tree structure (the scenario
    ** where database corruption has caused a page to be a direct or
    ** indirect descendant of itself).
    */ 
    rc = SQLITE_CORRUPT_BKPT;







|







1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
    if( pgno>pagerPagecount(pBt->pPager) ){
      return SQLITE_CORRUPT_BKPT; 
    }
    rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0);
    if( rc ) return rc;
    pPage = *ppPage;
  }
  if( pPage->isInit!=PAGE_ISINIT_FULL ){
     rc = sqlite3BtreeInitPage(pPage, pParent);
  }else if( pParent && (pPage==pParent || pPage->pParent!=pParent) ){
    /* This condition indicates a loop in the b-tree structure (the scenario
    ** where database corruption has caused a page to be a direct or
    ** indirect descendant of itself).
    */ 
    rc = SQLITE_CORRUPT_BKPT;
1180
1181
1182
1183
1184
1185
1186

1187

1188
1189
1190
1191
1192
1193

1194

1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213


1214
1215
1216
1217
1218
1219
1220
** reaches zero.  We need to unref the pParent pointer when that
** happens.
*/
static void pageDestructor(DbPage *pData){
  MemPage *pPage;
  pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  if( pPage ){

    assert( pPage->isInit==0 || sqlite3_mutex_held(pPage->pBt->mutex) );

    if( pPage->pParent ){
      MemPage *pParent = pPage->pParent;
      assert( pParent->pBt==pPage->pBt );
      pPage->pParent = 0;
      releasePage(pParent);
    }

    pPage->isInit = 0;

  }
}

/*
** During a rollback, when the pager reloads information into the cache
** so that the cache is restored to its original state at the start of
** the transaction, for each page restored this routine is called.
**
** This routine needs to reset the extra data section at the end of the
** page to agree with the restored data.
*/
static void pageReinit(DbPage *pData, int pageSize){
  MemPage *pPage;
  assert( (pageSize & 7)==0 );
  pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  if( pPage->isInit ){
    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
    pPage->isInit = 0;
    sqlite3BtreeInitPage(pPage, pPage->pParent);


  }
}

/*
** Invoke the busy handler for a btree.
*/
static int sqlite3BtreeInvokeBusyHandler(void *pArg, int n){







>
|
>






>
|
>











|

<

|



>
>







1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214

1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
** reaches zero.  We need to unref the pParent pointer when that
** happens.
*/
static void pageDestructor(DbPage *pData){
  MemPage *pPage;
  pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  if( pPage ){
    assert( pPage->isInit!=PAGE_ISINIT_FULL 
         || sqlite3_mutex_held(pPage->pBt->mutex) 
    );
    if( pPage->pParent ){
      MemPage *pParent = pPage->pParent;
      assert( pParent->pBt==pPage->pBt );
      pPage->pParent = 0;
      releasePage(pParent);
    }
    if( pPage->isInit==PAGE_ISINIT_FULL ){
      pPage->isInit = PAGE_ISINIT_DATA;
    }
  }
}

/*
** During a rollback, when the pager reloads information into the cache
** so that the cache is restored to its original state at the start of
** the transaction, for each page restored this routine is called.
**
** This routine needs to reset the extra data section at the end of the
** page to agree with the restored data.
*/
static void pageReinit(DbPage *pData){
  MemPage *pPage;

  pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  if( pPage->isInit==PAGE_ISINIT_FULL ){
    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
    pPage->isInit = 0;
    sqlite3BtreeInitPage(pPage, pPage->pParent);
  }else if( pPage->isInit==PAGE_ISINIT_DATA ){
    pPage->isInit = 0;
  }
}

/*
** Invoke the busy handler for a btree.
*/
static int sqlite3BtreeInvokeBusyHandler(void *pArg, int n){
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
    if( pCur->eState==CURSOR_FAULT ){
      return pCur->skip;
    }
    clearCursorPosition(pCur);
  }
  pRoot = pCur->pPage;
  if( pRoot && pRoot->pgno==pCur->pgnoRoot ){
    assert( pRoot->isInit );
  }else{
    if( 
      SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
    ){
      pCur->eState = CURSOR_INVALID;
      return rc;
    }







|







3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
    if( pCur->eState==CURSOR_FAULT ){
      return pCur->skip;
    }
    clearCursorPosition(pCur);
  }
  pRoot = pCur->pPage;
  if( pRoot && pRoot->pgno==pCur->pgnoRoot ){
    assert( pRoot->isInit==PAGE_ISINIT_FULL );
  }else{
    if( 
      SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
    ){
      pCur->eState = CURSOR_INVALID;
      return rc;
    }
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753


  rc = moveToRoot(pCur);
  if( rc ){
    return rc;
  }
  assert( pCur->pPage );
  assert( pCur->pPage->isInit );
  if( pCur->eState==CURSOR_INVALID ){
    *pRes = -1;
    assert( pCur->pPage->nCell==0 );
    return SQLITE_OK;
  }
  assert( pCur->pPage->intKey || pIdxKey );
  for(;;){







|







3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761


  rc = moveToRoot(pCur);
  if( rc ){
    return rc;
  }
  assert( pCur->pPage );
  assert( pCur->pPage->isInit==PAGE_ISINIT_FULL );
  if( pCur->eState==CURSOR_INVALID ){
    *pRes = -1;
    assert( pCur->pPage->nCell==0 );
    return SQLITE_OK;
  }
  assert( pCur->pPage->intKey || pIdxKey );
  for(;;){
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
      if( lwr>upr ){
        pCur->info.nKey = nCellKey;
        break;
      }
      pCur->idx = (lwr+upr)/2;
    }
    assert( lwr==upr+1 );
    assert( pPage->isInit );
    if( pPage->leaf ){
      chldPg = 0;
    }else if( lwr>=pPage->nCell ){
      chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    }else{
      chldPg = get4byte(findCell(pPage, lwr));
    }







|







3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
      if( lwr>upr ){
        pCur->info.nKey = nCellKey;
        break;
      }
      pCur->idx = (lwr+upr)/2;
    }
    assert( lwr==upr+1 );
    assert( pPage->isInit==PAGE_ISINIT_FULL );
    if( pPage->leaf ){
      chldPg = 0;
    }else if( lwr>=pPage->nCell ){
      chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    }else{
      chldPg = get4byte(findCell(pPage, lwr));
    }
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
  if( pCur->skip>0 ){
    pCur->skip = 0;
    *pRes = 0;
    return SQLITE_OK;
  }
  pCur->skip = 0;

  assert( pPage->isInit );
  assert( pCur->idx<pPage->nCell );

  pCur->idx++;
  pCur->info.nSize = 0;
  pCur->validNKey = 0;
  if( pCur->idx>=pPage->nCell ){
    if( !pPage->leaf ){







|







3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
  if( pCur->skip>0 ){
    pCur->skip = 0;
    *pRes = 0;
    return SQLITE_OK;
  }
  pCur->skip = 0;

  assert( pPage->isInit==PAGE_ISINIT_FULL );
  assert( pCur->idx<pPage->nCell );

  pCur->idx++;
  pCur->info.nSize = 0;
  pCur->validNKey = 0;
  if( pCur->idx>=pPage->nCell ){
    if( !pPage->leaf ){
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
    pCur->skip = 0;
    *pRes = 0;
    return SQLITE_OK;
  }
  pCur->skip = 0;

  pPage = pCur->pPage;
  assert( pPage->isInit );
  assert( pCur->idx>=0 );
  if( !pPage->leaf ){
    pgno = get4byte( findCell(pPage, pCur->idx) );
    rc = moveToChild(pCur, pgno);
    if( rc ){
      return rc;
    }







|







4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
    pCur->skip = 0;
    *pRes = 0;
    return SQLITE_OK;
  }
  pCur->skip = 0;

  pPage = pCur->pPage;
  assert( pPage->isInit==PAGE_ISINIT_FULL );
  assert( pCur->idx>=0 );
  if( !pPage->leaf ){
    pgno = get4byte( findCell(pPage, pCur->idx) );
    rc = moveToChild(pCur, pgno);
    if( rc ){
      return rc;
    }
4291
4292
4293
4294
4295
4296
4297







4298
4299
4300
4301
4302
4303
4304
  }

  assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );

end_allocate_page:
  releasePage(pTrunk);
  releasePage(pPrevTrunk);







  return rc;
}

/*
** Add a page of the database file to the freelist.
**
** sqlite3PagerUnref() is NOT called for pPage.







>
>
>
>
>
>
>







4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
  }

  assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );

end_allocate_page:
  releasePage(pTrunk);
  releasePage(pPrevTrunk);
  if( rc==SQLITE_OK ){
    if( (*ppPage)->isInit==PAGE_ISINIT_FULL ){
      releasePage(*ppPage);
      return SQLITE_CORRUPT_BKPT;
    }
    (*ppPage)->isInit = 0;
  }
  return rc;
}

/*
** Add a page of the database file to the freelist.
**
** sqlite3PagerUnref() is NOT called for pPage.
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( pNewParent!=0 );
  if( pgno==0 ) return SQLITE_OK;
  assert( pBt->pPager!=0 );
  pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
  if( pDbPage ){
    pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage);
    if( pThis->isInit ){
      assert( pThis->aData==sqlite3PagerGetData(pDbPage) );
      if( pThis->pParent!=pNewParent ){
        if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage);
        pThis->pParent = pNewParent;
        sqlite3PagerRef(pNewParent->pDbPage);
      }
      pThis->idxParent = idx;







|







4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( pNewParent!=0 );
  if( pgno==0 ) return SQLITE_OK;
  assert( pBt->pPager!=0 );
  pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
  if( pDbPage ){
    pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage);
    if( pThis->isInit==PAGE_ISINIT_FULL ){
      assert( pThis->aData==sqlite3PagerGetData(pDbPage) );
      if( pThis->pParent!=pNewParent ){
        if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage);
        pThis->pParent = pNewParent;
        sqlite3PagerRef(pNewParent->pDbPage);
      }
      pThis->idxParent = idx;
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946

















4947
4948
4949
4950
4951

4952


4953
4954
4955
4956
4957
4958
4959
**
** pPage is the leaf page which is the right-most page in the tree.
** pParent is its parent.  pPage must have a single overflow entry
** which is also the right-most entry on the page.
*/
static int balance_quick(MemPage *pPage, MemPage *pParent){
  int rc;
  MemPage *pNew;
  Pgno pgnoNew;
  u8 *pCell;
  u16 szCell;
  CellInfo info;
  BtShared *pBt = pPage->pBt;
  int parentIdx = pParent->nCell;   /* pParent new divider cell index */
  int parentSize;                   /* Size of new divider cell */
  u8 parentCell[64];                /* Space for the new divider cell */

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* Allocate a new page. Insert the overflow cell from pPage
  ** into it. Then remove the overflow cell from pPage.
  */
  rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pCell = pPage->aOvfl[0].pCell;
  szCell = cellSizePtr(pPage, pCell);
  zeroPage(pNew, pPage->aData[0]);
  assemblePage(pNew, 1, &pCell, &szCell);
  pPage->nOverflow = 0;

  /* Set the parent of the newly allocated page to pParent. */
  pNew->pParent = pParent;
  sqlite3PagerRef(pParent->pDbPage);

  /* pPage is currently the right-child of pParent. Change this
  ** so that the right-child is the new page allocated above and
  ** pPage is the next-to-right child. 
  **
  ** Ignore the return value of the call to fillInCell(). fillInCell()
  ** may only return other than SQLITE_OK if it is required to allocate
  ** one or more overflow pages. Since an internal table B-Tree cell 
  ** may never spill over onto an overflow page (it is a maximum of 
  ** 13 bytes in size), it is not neccessary to check the return code.
  **
  ** Similarly, the insertCell() function cannot fail if the page
  ** being inserted into is already writable and the cell does not 
  ** contain an overflow pointer. So ignore this return code too.
  */
  assert( pPage->nCell>0 );
  pCell = findCell(pPage, pPage->nCell-1);
  sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, 0, &parentSize);
  assert( parentSize<64 );
  assert( sqlite3PagerIswriteable(pParent->pDbPage) );
  insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
  put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
  put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);

  /* If this is an auto-vacuum database, update the pointer map
  ** with entries for the new page, and any pointer from the 
  ** cell on the page to an overflow page.
  */
  if( ISAUTOVACUUM ){
    rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno);
    if( rc==SQLITE_OK ){
      rc = ptrmapPutOvfl(pNew, 0);
    }
    if( rc!=SQLITE_OK ){
      releasePage(pNew);
      return rc;
    }
  }


















  /* Release the reference to the new page and balance the parent page,
  ** in case the divider cell inserted caused it to become overfull.
  */
  releasePage(pNew);

  return balance(pParent, 0);


}
#endif /* SQLITE_OMIT_QUICKBALANCE */

/*
** This routine redistributes Cells on pPage and up to NN*2 siblings
** of pPage so that all pages have about the same amount of free space.
** Usually NN siblings on either side of pPage is used in the balancing,







|















|
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<
<


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





>
|
>
>







4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911


4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954



4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
**
** pPage is the leaf page which is the right-most page in the tree.
** pParent is its parent.  pPage must have a single overflow entry
** which is also the right-most entry on the page.
*/
static int balance_quick(MemPage *pPage, MemPage *pParent){
  int rc;
  MemPage *pNew = 0;
  Pgno pgnoNew;
  u8 *pCell;
  u16 szCell;
  CellInfo info;
  BtShared *pBt = pPage->pBt;
  int parentIdx = pParent->nCell;   /* pParent new divider cell index */
  int parentSize;                   /* Size of new divider cell */
  u8 parentCell[64];                /* Space for the new divider cell */

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* Allocate a new page. Insert the overflow cell from pPage
  ** into it. Then remove the overflow cell from pPage.
  */
  rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
  if( rc==SQLITE_OK ){


    pCell = pPage->aOvfl[0].pCell;
    szCell = cellSizePtr(pPage, pCell);
    zeroPage(pNew, pPage->aData[0]);
    assemblePage(pNew, 1, &pCell, &szCell);
    pPage->nOverflow = 0;
  
    /* Set the parent of the newly allocated page to pParent. */
    pNew->pParent = pParent;
    sqlite3PagerRef(pParent->pDbPage);
  
    /* pPage is currently the right-child of pParent. Change this
    ** so that the right-child is the new page allocated above and
    ** pPage is the next-to-right child. 
    **
    ** Ignore the return value of the call to fillInCell(). fillInCell()
    ** may only return other than SQLITE_OK if it is required to allocate
    ** one or more overflow pages. Since an internal table B-Tree cell 
    ** may never spill over onto an overflow page (it is a maximum of 
    ** 13 bytes in size), it is not neccessary to check the return code.
    **
    ** Similarly, the insertCell() function cannot fail if the page
    ** being inserted into is already writable and the cell does not 
    ** contain an overflow pointer. So ignore this return code too.
    */
    assert( pPage->nCell>0 );
    pCell = findCell(pPage, pPage->nCell-1);
    sqlite3BtreeParseCellPtr(pPage, pCell, &info);
    fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, 0, &parentSize);
    assert( parentSize<64 );
    assert( sqlite3PagerIswriteable(pParent->pDbPage) );
    insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
    put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
    put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
  
    /* If this is an auto-vacuum database, update the pointer map
    ** with entries for the new page, and any pointer from the 
    ** cell on the page to an overflow page.
    */
    if( ISAUTOVACUUM ){
      rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno);
      if( rc==SQLITE_OK ){
        rc = ptrmapPutOvfl(pNew, 0);
      }



    }
  }

  /* At this point the pPage->nFree variable is not set correctly with
  ** respect to the content of the page (because it was set to 0 by 
  ** insertCell). So call sqlite3BtreeInitPage() to make sure it is
  ** correct.
  **
  ** This has to be done even if an error will be returned. Normally, if
  ** an error occurs during tree balancing, the contents of MemPage are
  ** not important, as they will be recalculated when the page is rolled
  ** back. But here, in balance_quick(), it is possible that pPage has 
  ** not yet been marked dirty or written into the journal file. Therefore
  ** it will not be rolled back and so it is important to make sure that
  ** the page data and contents of MemPage are consistent.
  */
  pPage->isInit = 0;
  sqlite3BtreeInitPage(pPage, pPage->pParent);
  sqlite3PagerUnref(pPage->pParent->pDbPage);

  /* Release the reference to the new page and balance the parent page,
  ** in case the divider cell inserted caused it to become overfull.
  */
  releasePage(pNew);
  if( rc==SQLITE_OK ){
    rc = balance(pParent, 0);
  }
  return rc;
}
#endif /* SQLITE_OMIT_QUICKBALANCE */

/*
** This routine redistributes Cells on pPage and up to NN*2 siblings
** of pPage so that all pages have about the same amount of free space.
** Usually NN siblings on either side of pPage is used in the balancing,
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
  u8 *aFrom = 0;

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* 
  ** Find the parent page.
  */
  assert( pPage->isInit );
  assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
  pBt = pPage->pBt;
  pParent = pPage->pParent;
  assert( pParent );
  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
    return rc;
  }







|







5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
  u8 *aFrom = 0;

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* 
  ** Find the parent page.
  */
  assert( pPage->isInit==PAGE_ISINIT_FULL );
  assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
  pBt = pPage->pBt;
  pParent = pPage->pParent;
  assert( pParent );
  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
    return rc;
  }
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573

5574
5575
5576

5577
5578
5579
5580
5581
5582
5583
  if( rc!=SQLITE_OK ) goto balance_cleanup;

  /*
  ** Balance the parent page.  Note that the current page (pPage) might
  ** have been added to the freelist so it might no longer be initialized.
  ** But the parent page will always be initialized.
  */
  assert( pParent->isInit );
  sqlite3ScratchFree(apCell);
  apCell = 0;
  rc = balance(pParent, 0);
  
  /*
  ** Cleanup before returning.
  */
balance_cleanup:
  sqlite3PageFree(aSpace2);
  sqlite3ScratchFree(apCell);
  for(i=0; i<nOld; i++){
    releasePage(apOld[i]);
  }
  for(i=0; i<nNew; i++){
    releasePage(apNew[i]);
  }

  releasePage(pParent);
  TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
          pPage->pgno, nOld, nNew, nCell));

  return rc;
}

/*
** This routine is called for the root page of a btree when the root
** page contains no cells.  This is an opportunity to make the tree
** shallower by one level.







|
















>



>







5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
  if( rc!=SQLITE_OK ) goto balance_cleanup;

  /*
  ** Balance the parent page.  Note that the current page (pPage) might
  ** have been added to the freelist so it might no longer be initialized.
  ** But the parent page will always be initialized.
  */
  assert( pParent->isInit==PAGE_ISINIT_FULL );
  sqlite3ScratchFree(apCell);
  apCell = 0;
  rc = balance(pParent, 0);
  
  /*
  ** Cleanup before returning.
  */
balance_cleanup:
  sqlite3PageFree(aSpace2);
  sqlite3ScratchFree(apCell);
  for(i=0; i<nOld; i++){
    releasePage(apOld[i]);
  }
  for(i=0; i<nNew; i++){
    releasePage(apNew[i]);
  }

  releasePage(pParent);
  TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
          pPage->pgno, nOld, nNew, nCell));

  return rc;
}

/*
** This routine is called for the root page of a btree when the root
** page contains no cells.  This is an opportunity to make the tree
** shallower by one level.
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
  usableSize = pBt->usableSize;
  data = pPage->aData;
  hdr = pPage->hdrOffset;
  cbrk = get2byte(&data[hdr+5]);
  cdata = pChild->aData;
  memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
  memcpy(&cdata[cbrk], &data[cbrk], usableSize-cbrk);
  if( pChild->isInit ) return SQLITE_CORRUPT;
  rc = sqlite3BtreeInitPage(pChild, pPage);
  if( rc ) goto balancedeeper_out;
  memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
  pChild->nOverflow = pPage->nOverflow;
  if( pChild->nOverflow ){
    pChild->nFree = 0;
  }







|







5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
  usableSize = pBt->usableSize;
  data = pPage->aData;
  hdr = pPage->hdrOffset;
  cbrk = get2byte(&data[hdr+5]);
  cdata = pChild->aData;
  memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
  memcpy(&cdata[cbrk], &data[cbrk], usableSize-cbrk);
  if( pChild->isInit==PAGE_ISINIT_FULL ) return SQLITE_CORRUPT;
  rc = sqlite3BtreeInitPage(pChild, pPage);
  if( rc ) goto balancedeeper_out;
  memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
  pChild->nOverflow = pPage->nOverflow;
  if( pChild->nOverflow ){
    pChild->nFree = 0;
  }
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900

  pPage = pCur->pPage;
  assert( pPage->intKey || nKey>=0 );
  assert( pPage->leaf || !pPage->intKey );
  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
          pCur->pgnoRoot, nKey, nData, pPage->pgno,
          loc==0 ? "overwrite" : "new entry"));
  assert( pPage->isInit );
  allocateTempSpace(pBt);
  newCell = pBt->pTmpSpace;
  if( newCell==0 ) return SQLITE_NOMEM;
  rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
  if( rc ) goto end_insert;
  assert( szNew==cellSizePtr(pPage, newCell) );
  assert( szNew<=MX_CELL_SIZE(pBt) );







|







5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932

  pPage = pCur->pPage;
  assert( pPage->intKey || nKey>=0 );
  assert( pPage->leaf || !pPage->intKey );
  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
          pCur->pgnoRoot, nKey, nData, pPage->pgno,
          loc==0 ? "overwrite" : "new entry"));
  assert( pPage->isInit==PAGE_ISINIT_FULL );
  allocateTempSpace(pBt);
  newCell = pBt->pTmpSpace;
  if( newCell==0 ) return SQLITE_NOMEM;
  rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
  if( rc ) goto end_insert;
  assert( szNew==cellSizePtr(pPage, newCell) );
  assert( szNew<=MX_CELL_SIZE(pBt) );
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
  unsigned char *pCell;
  int rc;
  Pgno pgnoChild = 0;
  Btree *p = pCur->pBtree;
  BtShared *pBt = p->pBt;

  assert( cursorHoldsMutex(pCur) );
  assert( pPage->isInit );
  if( pBt->inTransaction!=TRANS_WRITE ){
    /* Must start a transaction before doing a delete */
    rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
    return rc;
  }
  assert( !pBt->readOnly );
  if( pCur->eState==CURSOR_FAULT ){







|







5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
  unsigned char *pCell;
  int rc;
  Pgno pgnoChild = 0;
  Btree *p = pCur->pBtree;
  BtShared *pBt = p->pBt;

  assert( cursorHoldsMutex(pCur) );
  assert( pPage->isInit==PAGE_ISINIT_FULL );
  if( pBt->inTransaction!=TRANS_WRITE ){
    /* Must start a transaction before doing a delete */
    rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
    return rc;
  }
  assert( !pBt->readOnly );
  if( pCur->eState==CURSOR_FAULT ){
7120
7121
7122
7123
7124
7125
7126



7127

7128
7129
7130
7131
7132
7133
7134
          }

          memcpy(zTo, zFrom, nCopy);
	  sqlite3PagerUnref(pFromPage);
        }
      }




      if( pToPage ) sqlite3PagerUnref(pToPage);

    }
  }

  /* If things have worked so far, the database file may need to be 
  ** truncated. The complex part is that it may need to be truncated to
  ** a size that is not an integer multiple of nToPageSize - the current
  ** page size used by the pager associated with B-Tree pTo.







>
>
>
|
>







7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
          }

          memcpy(zTo, zFrom, nCopy);
	  sqlite3PagerUnref(pFromPage);
        }
      }

      if( pToPage ){
        MemPage *p = (MemPage *)sqlite3PagerGetExtra(pToPage);
        p->isInit = 0;
        sqlite3PagerUnref(pToPage);
      }
    }
  }

  /* If things have worked so far, the database file may need to be 
  ** truncated. The complex part is that it may need to be truncated to
  ** a size that is not an integer multiple of nToPageSize - the current
  ** page size used by the pager associated with B-Tree pTo.
Changes to src/btreeInt.h.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btreeInt.h,v 1.30 2008/08/01 20:10:08 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btreeInt.h,v 1.31 2008/09/18 17:34:44 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
292
293
294
295
296
297
298












299
300
301
302
303
304
305
  BtShared *pBt;       /* Pointer to BtShared that this page is part of */
  u8 *aData;           /* Pointer to disk image of the page data */
  DbPage *pDbPage;     /* Pager page handle */
  Pgno pgno;           /* Page number for this page */
  MemPage *pParent;    /* The parent of this page.  NULL for root */
};













/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
#define EXTRA_SIZE sizeof(MemPage)








>
>
>
>
>
>
>
>
>
>
>
>







292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  BtShared *pBt;       /* Pointer to BtShared that this page is part of */
  u8 *aData;           /* Pointer to disk image of the page data */
  DbPage *pDbPage;     /* Pager page handle */
  Pgno pgno;           /* Page number for this page */
  MemPage *pParent;    /* The parent of this page.  NULL for root */
};

/*
** Possible values for the MemPage.isInit variable. When a page is first
** loaded or if the data stored in the MemPage struct is invalidated, 
** MemPage.isInit is set to PAGE_ISINIT_NONE. If the MemPage structure
** is fully initialized, then MemPage.isInit is set to PAGE_ISINIT_FULL.
** MemPage.isInit is set to PAGE_ISINIT_DATA when the MemPage struct is
** populated, but the MemPage.pParent variable is not necessarily correct.
*/
#define PAGE_ISINIT_NONE 0
#define PAGE_ISINIT_DATA 1
#define PAGE_ISINIT_FULL 2

/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
#define EXTRA_SIZE sizeof(MemPage)

Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.491 2008/09/17 20:06:26 drh Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"

/*
** Macros for troubleshooting.  Normally turned off
*/







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.492 2008/09/18 17:34:44 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"

/*
** Macros for troubleshooting.  Normally turned off
*/
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  i64 stmtCksum;              /* cksumInit when statement was started */
  i64 stmtJSize;              /* Size of journal at stmt_begin() */
  int sectorSize;             /* Assumed sector size during rollback */
#ifdef SQLITE_TEST
  int nHit, nMiss;            /* Cache hits and missing */
  int nRead, nWrite;          /* Database pages read/written */
#endif
  void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */
  void (*xReiniter)(DbPage*,int);   /* Call this routine when reloading pages */
#ifdef SQLITE_HAS_CODEC
  void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
  void *pCodecArg;            /* First argument to xCodec() */
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
  char dbFileVers[16];        /* Changes whenever database file changes */
  i64 journalSizeLimit;       /* Size limit for persistent journal files */







<
|







204
205
206
207
208
209
210

211
212
213
214
215
216
217
218
  i64 stmtCksum;              /* cksumInit when statement was started */
  i64 stmtJSize;              /* Size of journal at stmt_begin() */
  int sectorSize;             /* Assumed sector size during rollback */
#ifdef SQLITE_TEST
  int nHit, nMiss;            /* Cache hits and missing */
  int nRead, nWrite;          /* Database pages read/written */
#endif

  void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
#ifdef SQLITE_HAS_CODEC
  void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
  void *pCodecArg;            /* First argument to xCodec() */
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
  char dbFileVers[16];        /* Changes whenever database file changes */
  i64 journalSizeLimit;       /* Size limit for persistent journal files */
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
    ** of an internal error resulting in an automatic call to
    ** sqlite3PagerRollback().
    */
    void *pData;
    pData = pPg->pData;
    memcpy(pData, aData, pPager->pageSize);
    if( pPager->xReiniter ){
      pPager->xReiniter(pPg, pPager->pageSize);
    }
    if( isMainJrnl ) makeClean(pPg);
#ifdef SQLITE_CHECK_PAGES
    pPg->pageHash = pager_pagehash(pPg);
#endif
    /* If this was page 1, then restore the value of Pager.dbFileVers.
    ** Do this before any decoding. */







|







1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
    ** of an internal error resulting in an automatic call to
    ** sqlite3PagerRollback().
    */
    void *pData;
    pData = pPg->pData;
    memcpy(pData, aData, pPager->pageSize);
    if( pPager->xReiniter ){
      pPager->xReiniter(pPg);
    }
    if( isMainJrnl ) makeClean(pPg);
#ifdef SQLITE_CHECK_PAGES
    pPg->pageHash = pager_pagehash(pPg);
#endif
    /* If this was page 1, then restore the value of Pager.dbFileVers.
    ** Do this before any decoding. */
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
/*
** Set the reinitializer for this pager.  If not NULL, the reinitializer
** is called when the content of a page in cache is restored to its original
** value as a result of a rollback.  The callback gives higher-level code
** an opportunity to restore the EXTRA section to agree with the restored
** page data.
*/
void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*,int)){
  pPager->xReiniter = xReinit;
}

/*
** Set the page size to *pPageSize. If the suggest new page size is
** inappropriate, then an alternative page size is set to that
** value before returning.







|







1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
/*
** Set the reinitializer for this pager.  If not NULL, the reinitializer
** is called when the content of a page in cache is restored to its original
** value as a result of a rollback.  The callback gives higher-level code
** an opportunity to restore the EXTRA section to agree with the restored
** page data.
*/
void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*)){
  pPager->xReiniter = xReinit;
}

/*
** Set the page size to *pPageSize. If the suggest new page size is
** inappropriate, then an alternative page size is set to that
** value before returning.
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
** codes are returned for all these occasions.  Otherwise,
** SQLITE_OK is returned.
*/
int sqlite3PagerRollback(Pager *pPager){
  int rc = SQLITE_OK;
  PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
  if( MEMDB ){
    sqlite3PcacheRollback(pPager->pPCache, 1);
    sqlite3PcacheRollback(pPager->pPCache, 0);
    sqlite3PcacheCleanAll(pPager->pPCache);
    sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
    pPager->dbSize = pPager->origDbSize;
    pager_truncate_cache(pPager);
    pPager->stmtInUse = 0;
    pPager->state = PAGER_SHARED;
  }else if( !pPager->dirtyCache || !pPager->journalOpen ){







|
|







3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
** codes are returned for all these occasions.  Otherwise,
** SQLITE_OK is returned.
*/
int sqlite3PagerRollback(Pager *pPager){
  int rc = SQLITE_OK;
  PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
  if( MEMDB ){
    sqlite3PcacheRollback(pPager->pPCache, 1, pPager->xReiniter);
    sqlite3PcacheRollback(pPager->pPCache, 0, pPager->xReiniter);
    sqlite3PcacheCleanAll(pPager->pPCache);
    sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
    pPager->dbSize = pPager->origDbSize;
    pager_truncate_cache(pPager);
    pPager->stmtInUse = 0;
    pPager->state = PAGER_SHARED;
  }else if( !pPager->dirtyCache || !pPager->journalOpen ){
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
** Rollback a statement.
*/
int sqlite3PagerStmtRollback(Pager *pPager){
  int rc;
  if( pPager->stmtInUse ){
    PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
    if( MEMDB ){
      sqlite3PcacheRollback(pPager->pPCache, 1);
      pPager->dbSize = pPager->stmtSize;
      pager_truncate_cache(pPager);
      rc = SQLITE_OK;
    }else{
      rc = pager_stmt_playback(pPager);
    }
    sqlite3PagerStmtCommit(pPager);







|







3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
** Rollback a statement.
*/
int sqlite3PagerStmtRollback(Pager *pPager){
  int rc;
  if( pPager->stmtInUse ){
    PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
    if( MEMDB ){
      sqlite3PcacheRollback(pPager->pPCache, 1, pPager->xReiniter);
      pPager->dbSize = pPager->stmtSize;
      pager_truncate_cache(pPager);
      rc = SQLITE_OK;
    }else{
      rc = pager_stmt_playback(pPager);
    }
    sqlite3PagerStmtCommit(pPager);
Changes to src/pager.h.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.82 2008/09/01 17:23:29 drh Exp $
*/

#ifndef _PAGER_H_
#define _PAGER_H_

/*
** If defined as non-zero, auto-vacuum is enabled by default. Otherwise







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.83 2008/09/18 17:34:44 danielk1977 Exp $
*/

#ifndef _PAGER_H_
#define _PAGER_H_

/*
** If defined as non-zero, auto-vacuum is enabled by default. Otherwise
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

/*
** See source code comments for a detailed description of the following
** routines:
*/
int sqlite3PagerOpen(sqlite3_vfs *, Pager **ppPager, const char*, void(*)(DbPage*), int,int,int);
void sqlite3PagerSetBusyhandler(Pager*, BusyHandler *pBusyHandler);
void sqlite3PagerSetReiniter(Pager*, void(*)(DbPage*,int));
int sqlite3PagerSetPagesize(Pager*, u16*);
int sqlite3PagerMaxPageCount(Pager*, int);
int sqlite3PagerReadFileheader(Pager*, int, unsigned char*);
void sqlite3PagerSetCachesize(Pager*, int);
int sqlite3PagerClose(Pager *pPager);
int sqlite3PagerAcquire(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag);
#define sqlite3PagerGet(A,B,C) sqlite3PagerAcquire(A,B,C,0)







|







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

/*
** See source code comments for a detailed description of the following
** routines:
*/
int sqlite3PagerOpen(sqlite3_vfs *, Pager **ppPager, const char*, void(*)(DbPage*), int,int,int);
void sqlite3PagerSetBusyhandler(Pager*, BusyHandler *pBusyHandler);
void sqlite3PagerSetReiniter(Pager*, void(*)(DbPage*));
int sqlite3PagerSetPagesize(Pager*, u16*);
int sqlite3PagerMaxPageCount(Pager*, int);
int sqlite3PagerReadFileheader(Pager*, int, unsigned char*);
void sqlite3PagerSetCachesize(Pager*, int);
int sqlite3PagerClose(Pager *pPager);
int sqlite3PagerAcquire(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag);
#define sqlite3PagerGet(A,B,C) sqlite3PagerAcquire(A,B,C,0)
Changes to src/pcache.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
** 2008 August 05
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements that page cache.
**
** @(#) $Id: pcache.c,v 1.29 2008/09/17 20:06:26 drh Exp $
*/
#include "sqliteInt.h"

/*
** A complete page cache is an instance of this structure.
**
** A cache may only be deleted by its owner and while holding the













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
** 2008 August 05
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements that page cache.
**
** @(#) $Id: pcache.c,v 1.30 2008/09/18 17:34:44 danielk1977 Exp $
*/
#include "sqliteInt.h"

/*
** A complete page cache is an instance of this structure.
**
** A cache may only be deleted by its owner and while holding the
1015
1016
1017
1018
1019
1020
1021
1022




1023
1024
1025
1026
1027
1028
1029
1030
1031
1032



1033
1034
1035
1036
1037
1038
1039
  }
  pcacheExitMutex();
}

/*
** Rollback a change previously preserved.
*/
void sqlite3PcacheRollback(PCache *pCache, int idJournal){




  PgHdr *p;
  int sz;
  int mask = idJournal==0 ? ~PGHDR_IN_JOURNAL : 0xffffff;
  pcacheEnterMutex();     /* Mutex is required to call pcacheFree() */
  sz = pCache->szPage;
  for(p=pCache->pDirty; p; p=p->pNext){
    if( p->apSave[idJournal] ){
      memcpy(p->pData, p->apSave[idJournal], sz);
      pcacheFree(p->apSave[idJournal]);
      p->apSave[idJournal] = 0;



    }
    p->flags &= mask;
  }
  pcacheExitMutex();
}

#ifndef NDEBUG







|
>
>
>
>










>
>
>







1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
  }
  pcacheExitMutex();
}

/*
** Rollback a change previously preserved.
*/
void sqlite3PcacheRollback(
  PCache *pCache,                  /* Pager cache */
  int idJournal,                   /* Which copy to rollback to */
  void (*xReiniter)(PgHdr*)        /* Called on each rolled back page */
){
  PgHdr *p;
  int sz;
  int mask = idJournal==0 ? ~PGHDR_IN_JOURNAL : 0xffffff;
  pcacheEnterMutex();     /* Mutex is required to call pcacheFree() */
  sz = pCache->szPage;
  for(p=pCache->pDirty; p; p=p->pNext){
    if( p->apSave[idJournal] ){
      memcpy(p->pData, p->apSave[idJournal], sz);
      pcacheFree(p->apSave[idJournal]);
      p->apSave[idJournal] = 0;
      if( xReiniter ){
        xReiniter(p);
      }
    }
    p->flags &= mask;
  }
  pcacheExitMutex();
}

#ifndef NDEBUG
Changes to src/pcache.h.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem. 
**
** @(#) $Id: pcache.h,v 1.10 2008/09/17 20:06:26 drh Exp $
*/

#ifndef _PCACHE_H_

typedef struct PgHdr PgHdr;
typedef struct PCache PCache;








|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem. 
**
** @(#) $Id: pcache.h,v 1.11 2008/09/18 17:34:44 danielk1977 Exp $
*/

#ifndef _PCACHE_H_

typedef struct PgHdr PgHdr;
typedef struct PCache PCache;

109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

/* Remove all pages with pgno>x.  Reset the cache if x==0 */
void sqlite3PcacheTruncate(PCache*, Pgno x);

/* Routines used to implement transactions on memory-only databases. */
int sqlite3PcachePreserve(PgHdr*, int);    /* Preserve current page content */
void sqlite3PcacheCommit(PCache*, int);    /* Drop preserved copy */
void sqlite3PcacheRollback(PCache*, int);  /* Rollback to preserved copy */

/* Get a list of all dirty pages in the cache, sorted by page number */
PgHdr *sqlite3PcacheDirtyList(PCache*);

/* Reset and close the cache object */
void sqlite3PcacheClose(PCache*);








|







109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

/* Remove all pages with pgno>x.  Reset the cache if x==0 */
void sqlite3PcacheTruncate(PCache*, Pgno x);

/* Routines used to implement transactions on memory-only databases. */
int sqlite3PcachePreserve(PgHdr*, int);    /* Preserve current page content */
void sqlite3PcacheCommit(PCache*, int);    /* Drop preserved copy */
void sqlite3PcacheRollback(PCache*, int, void (*xReiniter)(PgHdr*));

/* Get a list of all dirty pages in the cache, sorted by page number */
PgHdr *sqlite3PcacheDirtyList(PCache*);

/* Reset and close the cache object */
void sqlite3PcacheClose(PCache*);