/ Check-in [ef9fbc08]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Ensure that when the number of cells on a page drops to zero that the freelist and fragment counter are both cleared. Also add evidence marks corresponding to file-format documentation.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:ef9fbc08b0a047042deeb2d6007d67028fefb9e2
User & Date: drh 2014-11-20 15:30:50
Context
2014-11-20
19:22
Add requirements marks on the built-in collating functions. check-in: 4b608b62 user: drh tags: trunk
15:30
Ensure that when the number of cells on a page drops to zero that the freelist and fragment counter are both cleared. Also add evidence marks corresponding to file-format documentation. check-in: ef9fbc08 user: drh tags: trunk
02:58
Fix the encoding of some integers to use the minimum amount of space: -128, -32768, -8388608, -217483648, and -140737488355328. check-in: 2d7c8da5 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/btree.c.

1135
1136
1137
1138
1139
1140
1141





1142
1143
1144
1145
1146
1147
1148
....
1258
1259
1260
1261
1262
1263
1264


1265
1266
1267
1268
1269
1270
1271
....
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328

1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
....
1493
1494
1495
1496
1497
1498
1499






1500
1501
1502
1503
1504
1505






1506
1507
1508
1509
1510
1511


1512
1513
1514
1515
1516
1517
1518
....
1544
1545
1546
1547
1548
1549
1550


1551
1552
1553
1554
1555
1556
1557
1558



1559


1560
1561
1562
1563
1564
1565





1566
1567
1568
1569
1570
1571
1572
....
1592
1593
1594
1595
1596
1597
1598
1599



1600
1601
1602
1603
1604



1605

1606
1607
1608
1609
1610
1611
1612
....
5955
5956
5957
5958
5959
5960
5961







5962
5963
5964

5965
5966
5967
5968
5969
5970
5971
....
8605
8606
8607
8608
8609
8610
8611


8612


8613


8614
8615
8616
8617
8618
8619
8620
....
8622
8623
8624
8625
8626
8627
8628



8629
8630
8631
8632
8633
8634
8635




8636


8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649





8650
8651
8652
8653
8654
8655
8656


/*
** Defragment the page given.  All Cells are moved to the
** end of the page and all free space is collected into one
** big FreeBlk that occurs in between the header and cell
** pointer array and the cell content area.





*/
static int defragmentPage(MemPage *pPage){
  int i;                     /* Loop counter */
  int pc;                    /* Address of the i-th cell */
  int hdr;                   /* Offset to the page header */
  int size;                  /* Size of a cell */
  int usableSize;            /* Number of usable bytes on a page */
................................................................................
    ** in bytes, including the 4-byte header. */
    size = get2byte(&aData[pc+2]);
    if( size>=nByte ){
      int x = size - nByte;
      testcase( x==4 );
      testcase( x==3 );
      if( x<4 ){


        if( aData[hdr+7]>=60 ){
          if( pbDefrag ) *pbDefrag = 1;
          return 0;
        }
        /* Remove the slot from the free-list. Update the number of
        ** fragmented bytes within the page. */
        memcpy(&aData[iAddr], &aData[pc], 2);
................................................................................
  assert( pPage->nFree>=nByte );
  assert( pPage->nOverflow==0 );
  assert( nByte < (int)(pPage->pBt->usableSize-8) );

  assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf );
  gap = pPage->cellOffset + 2*pPage->nCell;
  assert( gap<=65536 );
  top = get2byte(&data[hdr+5]);
  if( gap>top ){
    if( top==0 ){
      /* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size
      ** and the reserved space is zero (the usual value for reserved space)
      ** then the cell content offset of an empty page wants to be 65536.
      ** However, that integer is too large to be stored in a 2-byte unsigned
      ** integer, so a value of 0 is used in its place. */
      top = 65536;
    }else{

      return SQLITE_CORRUPT_BKPT;
    }
  }

  /* If there is enough space between gap and top for one more cell pointer
  ** array entry offset, and if the freelist is not empty, then search the
  ** freelist looking for a free slot big enough to satisfy the request.
  */
  testcase( gap+2==top );
  testcase( gap+1==top );
................................................................................
  assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  pPage->leaf = (u8)(flagByte>>3);  assert( PTF_LEAF == 1<<3 );
  flagByte &= ~PTF_LEAF;
  pPage->childPtrSize = 4-4*pPage->leaf;
  pBt = pPage->pBt;
  if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){






    pPage->intKey = 1;
    pPage->intKeyLeaf = pPage->leaf;
    pPage->noPayload = !pPage->leaf;
    pPage->maxLocal = pBt->maxLeaf;
    pPage->minLocal = pBt->minLeaf;
  }else if( flagByte==PTF_ZERODATA ){






    pPage->intKey = 0;
    pPage->intKeyLeaf = 0;
    pPage->noPayload = 0;
    pPage->maxLocal = pBt->maxLocal;
    pPage->minLocal = pBt->minLocal;
  }else{


    return SQLITE_CORRUPT_BKPT;
  }
  pPage->max1bytePayload = pBt->max1bytePayload;
  return SQLITE_OK;
}

/*
................................................................................
    int iCellFirst;    /* First allowable cell or freeblock offset */
    int iCellLast;     /* Last possible cell or freeblock offset */

    pBt = pPage->pBt;

    hdr = pPage->hdrOffset;
    data = pPage->aData;


    if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
    assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
    pPage->maskPage = (u16)(pBt->pageSize - 1);
    pPage->nOverflow = 0;
    usableSize = pBt->usableSize;
    pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
    pPage->aDataEnd = &data[usableSize];
    pPage->aCellIdx = &data[cellOffset];



    top = get2byteNotZero(&data[hdr+5]);


    pPage->nCell = get2byte(&data[hdr+3]);
    if( pPage->nCell>MX_CELL(pBt) ){
      /* To many cells for a single page.  The page must be corrupt */
      return SQLITE_CORRUPT_BKPT;
    }
    testcase( pPage->nCell==MX_CELL(pBt) );






    /* A malformed database page might cause us to read past the end
    ** of page when parsing a cell.  
    **
    ** The following block of code checks early to see if a cell extends
    ** past the end of a page boundary and causes SQLITE_CORRUPT to be 
    ** returned if it does.
................................................................................
          return SQLITE_CORRUPT_BKPT;
        }
      }
      if( !pPage->leaf ) iCellLast++;
    }  
#endif

    /* Compute the total free space on the page */



    pc = get2byte(&data[hdr+1]);
    nFree = data[hdr+7] + top;
    while( pc>0 ){
      u16 next, size;
      if( pc<iCellFirst || pc>iCellLast ){



        /* Start of free block is off the page */

        return SQLITE_CORRUPT_BKPT; 
      }
      next = get2byte(&data[pc]);
      size = get2byte(&data[pc+2]);
      if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){
        /* Free blocks must be in ascending order. And the last byte of
        ** the free-block must lie on the database page.  */
................................................................................
  }
  rc = freeSpace(pPage, pc, sz);
  if( rc ){
    *pRC = rc;
    return;
  }
  pPage->nCell--;







  memmove(ptr, ptr+2, 2*(pPage->nCell - idx));
  put2byte(&data[hdr+3], pPage->nCell);
  pPage->nFree += 2;

}

/*
** Insert a new cell on pPage at cell index "i".  pCell points to the
** content of the cell.
**
** If the cell content will fit on the page, then put it there.  If it
................................................................................
  if( hit==0 ){
    pCheck->mallocFailed = 1;
  }else{
    int contentOffset = get2byteNotZero(&data[hdr+5]);
    assert( contentOffset<=usableSize );  /* Enforced by btreeInitPage() */
    memset(hit+contentOffset, 0, usableSize-contentOffset);
    memset(hit, 1, contentOffset);


    nCell = get2byte(&data[hdr+3]);


    cellStart = hdr + 12 - 4*pPage->leaf;


    for(i=0; i<nCell; i++){
      int pc = get2byte(&data[cellStart+i*2]);
      u32 size = 65536;
      int j;
      if( pc<=usableSize-4 ){
        size = cellSizePtr(pPage, &data[pc]);
      }
................................................................................
        pCheck->zPfx = 0;
        checkAppendMsg(pCheck,
            "Corruption detected in cell %d on page %d",i,iPage);
      }else{
        for(j=pc+size-1; j>=pc; j--) hit[j]++;
      }
    }



    i = get2byte(&data[hdr+1]);
    while( i>0 ){
      int size, j;
      assert( i<=usableSize-4 );     /* Enforced by btreeInitPage() */
      size = get2byte(&data[i+2]);
      assert( i+size<=usableSize );  /* Enforced by btreeInitPage() */
      for(j=i+size-1; j>=i; j--) hit[j]++;




      j = get2byte(&data[i]);


      assert( j==0 || j>i+size );  /* Enforced by btreeInitPage() */
      assert( j<=usableSize-4 );   /* Enforced by btreeInitPage() */
      i = j;
    }
    for(i=cnt=0; i<usableSize; i++){
      if( hit[i]==0 ){
        cnt++;
      }else if( hit[i]>1 ){
        checkAppendMsg(pCheck,
          "Multiple uses for byte %d of page %d", i, iPage);
        break;
      }
    }





    if( cnt!=data[hdr+7] ){
      checkAppendMsg(pCheck,
          "Fragmentation of %d bytes reported as %d on page %d",
          cnt, data[hdr+7], iPage);
    }
  }
  sqlite3PageFree(hit);







>
>
>
>
>







 







>
>







 







<
<
<
|
|
|
|
|
<
<
>
|
<
<







 







>
>
>
>
>
>






>
>
>
>
>
>






>
>







 







>
>





|


>
>
>

>
>






>
>
>
>
>







 







|
>
>
>

|



>
>
>
|
>







 







>
>
>
>
>
>
>
|
|
|
>







 







>
>

>
>

>
>







 







>
>
>







>
>
>
>

>
>













>
>
>
>
>







1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
....
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
....
1319
1320
1321
1322
1323
1324
1325



1326
1327
1328
1329
1330


1331
1332


1333
1334
1335
1336
1337
1338
1339
....
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
....
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
....
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
....
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
....
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
....
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718


/*
** Defragment the page given.  All Cells are moved to the
** end of the page and all free space is collected into one
** big FreeBlk that occurs in between the header and cell
** pointer array and the cell content area.
**
** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a
** b-tree page so that there are no freeblocks or fragment bytes, all
** unused bytes are contained in the unallocated space region, and all
** cells are packed tightly at the end of the page.
*/
static int defragmentPage(MemPage *pPage){
  int i;                     /* Loop counter */
  int pc;                    /* Address of the i-th cell */
  int hdr;                   /* Offset to the page header */
  int size;                  /* Size of a cell */
  int usableSize;            /* Number of usable bytes on a page */
................................................................................
    ** in bytes, including the 4-byte header. */
    size = get2byte(&aData[pc+2]);
    if( size>=nByte ){
      int x = size - nByte;
      testcase( x==4 );
      testcase( x==3 );
      if( x<4 ){
        /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total
        ** number of bytes in fragments may not exceed 60. */
        if( aData[hdr+7]>=60 ){
          if( pbDefrag ) *pbDefrag = 1;
          return 0;
        }
        /* Remove the slot from the free-list. Update the number of
        ** fragmented bytes within the page. */
        memcpy(&aData[iAddr], &aData[pc], 2);
................................................................................
  assert( pPage->nFree>=nByte );
  assert( pPage->nOverflow==0 );
  assert( nByte < (int)(pPage->pBt->usableSize-8) );

  assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf );
  gap = pPage->cellOffset + 2*pPage->nCell;
  assert( gap<=65536 );



  /* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size
  ** and the reserved space is zero (the usual value for reserved space)
  ** then the cell content offset of an empty page wants to be 65536.
  ** However, that integer is too large to be stored in a 2-byte unsigned
  ** integer, so a value of 0 is used in its place. */


  top = get2byteNotZero(&data[hdr+5]);
  if( gap>top ) return SQLITE_CORRUPT_BKPT;



  /* If there is enough space between gap and top for one more cell pointer
  ** array entry offset, and if the freelist is not empty, then search the
  ** freelist looking for a free slot big enough to satisfy the request.
  */
  testcase( gap+2==top );
  testcase( gap+1==top );
................................................................................
  assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  pPage->leaf = (u8)(flagByte>>3);  assert( PTF_LEAF == 1<<3 );
  flagByte &= ~PTF_LEAF;
  pPage->childPtrSize = 4-4*pPage->leaf;
  pBt = pPage->pBt;
  if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
    /* EVIDENCE-OF: R-03640-13415 A value of 5 means the page is an interior
    ** table b-tree page. */
    assert( (PTF_LEAFDATA|PTF_INTKEY)==5 );
    /* EVIDENCE-OF: R-20501-61796 A value of 13 means the page is a leaf
    ** table b-tree page. */
    assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 );
    pPage->intKey = 1;
    pPage->intKeyLeaf = pPage->leaf;
    pPage->noPayload = !pPage->leaf;
    pPage->maxLocal = pBt->maxLeaf;
    pPage->minLocal = pBt->minLeaf;
  }else if( flagByte==PTF_ZERODATA ){
    /* EVIDENCE-OF: R-27225-53936 A value of 2 means the page is an interior
    ** index b-tree page. */
    assert( (PTF_ZERODATA)==2 );
    /* EVIDENCE-OF: R-16571-11615 A value of 10 means the page is a leaf
    ** index b-tree page. */
    assert( (PTF_ZERODATA|PTF_LEAF)==10 );
    pPage->intKey = 0;
    pPage->intKeyLeaf = 0;
    pPage->noPayload = 0;
    pPage->maxLocal = pBt->maxLocal;
    pPage->minLocal = pBt->minLocal;
  }else{
    /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is
    ** an error. */
    return SQLITE_CORRUPT_BKPT;
  }
  pPage->max1bytePayload = pBt->max1bytePayload;
  return SQLITE_OK;
}

/*
................................................................................
    int iCellFirst;    /* First allowable cell or freeblock offset */
    int iCellLast;     /* Last possible cell or freeblock offset */

    pBt = pPage->pBt;

    hdr = pPage->hdrOffset;
    data = pPage->aData;
    /* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating
    ** the b-tree page type. */
    if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
    assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
    pPage->maskPage = (u16)(pBt->pageSize - 1);
    pPage->nOverflow = 0;
    usableSize = pBt->usableSize;
    pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize;
    pPage->aDataEnd = &data[usableSize];
    pPage->aCellIdx = &data[cellOffset];
    /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates
    ** the start of the cell content area. A zero value for this integer is
    ** interpreted as 65536. */
    top = get2byteNotZero(&data[hdr+5]);
    /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
    ** number of cells on the page. */
    pPage->nCell = get2byte(&data[hdr+3]);
    if( pPage->nCell>MX_CELL(pBt) ){
      /* To many cells for a single page.  The page must be corrupt */
      return SQLITE_CORRUPT_BKPT;
    }
    testcase( pPage->nCell==MX_CELL(pBt) );
    /* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only
    ** possible for a root page of a table that contains no rows) then the
    ** offset to the cell content area will equal the page size minus the
    ** bytes of reserved space. */
    assert( pPage->nCell>0 || top==usableSize || CORRUPT_DB );

    /* A malformed database page might cause us to read past the end
    ** of page when parsing a cell.  
    **
    ** The following block of code checks early to see if a cell extends
    ** past the end of a page boundary and causes SQLITE_CORRUPT to be 
    ** returned if it does.
................................................................................
          return SQLITE_CORRUPT_BKPT;
        }
      }
      if( !pPage->leaf ) iCellLast++;
    }  
#endif

    /* Compute the total free space on the page
    ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the
    ** start of the first freeblock on the page, or is zero if there are no
    ** freeblocks. */
    pc = get2byte(&data[hdr+1]);
    nFree = data[hdr+7] + top;  /* Init nFree to non-freeblock free space */
    while( pc>0 ){
      u16 next, size;
      if( pc<iCellFirst || pc>iCellLast ){
        /* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will
        ** always be at least one cell before the first freeblock.
        **
        ** Or, the freeblock is off the end of the page
        */
        return SQLITE_CORRUPT_BKPT; 
      }
      next = get2byte(&data[pc]);
      size = get2byte(&data[pc+2]);
      if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){
        /* Free blocks must be in ascending order. And the last byte of
        ** the free-block must lie on the database page.  */
................................................................................
  }
  rc = freeSpace(pPage, pc, sz);
  if( rc ){
    *pRC = rc;
    return;
  }
  pPage->nCell--;
  if( pPage->nCell==0 ){
    memset(&data[hdr+1], 0, 4);
    data[hdr+7] = 0;
    put2byte(&data[hdr+5], pPage->pBt->usableSize);
    pPage->nFree = pPage->pBt->usableSize - pPage->hdrOffset
                       - pPage->childPtrSize - 8;
  }else{
    memmove(ptr, ptr+2, 2*(pPage->nCell - idx));
    put2byte(&data[hdr+3], pPage->nCell);
    pPage->nFree += 2;
  }
}

/*
** Insert a new cell on pPage at cell index "i".  pCell points to the
** content of the cell.
**
** If the cell content will fit on the page, then put it there.  If it
................................................................................
  if( hit==0 ){
    pCheck->mallocFailed = 1;
  }else{
    int contentOffset = get2byteNotZero(&data[hdr+5]);
    assert( contentOffset<=usableSize );  /* Enforced by btreeInitPage() */
    memset(hit+contentOffset, 0, usableSize-contentOffset);
    memset(hit, 1, contentOffset);
    /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
    ** number of cells on the page. */
    nCell = get2byte(&data[hdr+3]);
    /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page
    ** immediately follows the b-tree page header. */
    cellStart = hdr + 12 - 4*pPage->leaf;
    /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte
    ** integer offsets to the cell contents. */
    for(i=0; i<nCell; i++){
      int pc = get2byte(&data[cellStart+i*2]);
      u32 size = 65536;
      int j;
      if( pc<=usableSize-4 ){
        size = cellSizePtr(pPage, &data[pc]);
      }
................................................................................
        pCheck->zPfx = 0;
        checkAppendMsg(pCheck,
            "Corruption detected in cell %d on page %d",i,iPage);
      }else{
        for(j=pc+size-1; j>=pc; j--) hit[j]++;
      }
    }
    /* EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header
    ** is the offset of the first freeblock, or zero if there are no
    ** freeblocks on the page. */
    i = get2byte(&data[hdr+1]);
    while( i>0 ){
      int size, j;
      assert( i<=usableSize-4 );     /* Enforced by btreeInitPage() */
      size = get2byte(&data[i+2]);
      assert( i+size<=usableSize );  /* Enforced by btreeInitPage() */
      for(j=i+size-1; j>=i; j--) hit[j]++;
      /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a
      ** big-endian integer which is the offset in the b-tree page of the next
      ** freeblock in the chain, or zero if the freeblock is the last on the
      ** chain. */
      j = get2byte(&data[i]);
      /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of
      ** increasing offset. */
      assert( j==0 || j>i+size );  /* Enforced by btreeInitPage() */
      assert( j<=usableSize-4 );   /* Enforced by btreeInitPage() */
      i = j;
    }
    for(i=cnt=0; i<usableSize; i++){
      if( hit[i]==0 ){
        cnt++;
      }else if( hit[i]>1 ){
        checkAppendMsg(pCheck,
          "Multiple uses for byte %d of page %d", i, iPage);
        break;
      }
    }
    /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments
    ** is stored in the fifth field of the b-tree page header.
    ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the
    ** number of fragmented free bytes within the cell content area.
    */
    if( cnt!=data[hdr+7] ){
      checkAppendMsg(pCheck,
          "Fragmentation of %d bytes reported as %d on page %d",
          cnt, data[hdr+7], iPage);
    }
  }
  sqlite3PageFree(hit);