SQLite

Check-in [fec849dcca]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Attempt to further reduce memcpy() in balance_nonroot().
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | defrag-opt
Files: files | file ages | folders
SHA1: fec849dcca3aead2bc2d4ecffeda750684d32fb0
User & Date: dan 2014-10-11 20:00:24.552
Context
2014-10-13
18:03
Further work on balance_nonroot(). (check-in: 6594f9b420 user: dan tags: defrag-opt)
2014-10-11
20:00
Attempt to further reduce memcpy() in balance_nonroot(). (check-in: fec849dcca user: dan tags: defrag-opt)
2014-10-09
19:35
Change the balance_nonroot() routine to reduce the amount of memcpy work that takes place. This is a work in progress. (check-in: 29304499ea user: dan tags: defrag-opt)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
  pPage->nFree -= (nCell*2 + nUsable - cellbody);
  pPage->nCell = (u16)nCell;
}


static void rebuildPage(
  MemPage *pPg,                   /* Edit this page */
  int nRemove,                    /* Cells to remove from start of page */
  int nCell,                      /* Final number of cells on page */
  u8 **apCell,                    /* Array of nCell final cells */
  u16 *szCell                     /* Array of nCell cell sizes */
){
  const int hdr = pPg->hdrOffset;          /* Offset of header on pPg */
  u8 * const aData = pPg->aData;           /* Pointer to data for pPg */
  const int usableSize = pPg->pBt->usableSize;
  u8 * const pEnd = &aData[usableSize];
  int i;
  u8 *pCellptr = pPg->aCellIdx;







<

|
|







5976
5977
5978
5979
5980
5981
5982

5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
  pPage->nFree -= (nCell*2 + nUsable - cellbody);
  pPage->nCell = (u16)nCell;
}


static void rebuildPage(
  MemPage *pPg,                   /* Edit this page */

  int nCell,                      /* Final number of cells on page */
  u8 **apCell,                    /* Array of cells */
  u16 *szCell                     /* Array of cell sizes */
){
  const int hdr = pPg->hdrOffset;          /* Offset of header on pPg */
  u8 * const aData = pPg->aData;           /* Pointer to data for pPg */
  const int usableSize = pPg->pBt->usableSize;
  u8 * const pEnd = &aData[usableSize];
  int i;
  u8 *pCellptr = pPg->aCellIdx;
6015
6016
6017
6018
6019
6020
6021



























































































































6022
6023
6024
6025
6026
6027
6028
  pPg->nOverflow = 0;

  put2byte(&aData[hdr+1], 0);
  put2byte(&aData[hdr+3], pPg->nCell);
  put2byte(&aData[hdr+5], pData - aData);
  aData[hdr+7] = 0x00;
}




























































































































/*
** The following parameters determine how many adjacent pages get involved
** in a balancing operation.  NN is the number of neighbors on either side
** of the page that participate in the balancing operation.  NB is the
** total number of pages that participate, including the target page and
** NN neighbors on either side.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
  pPg->nOverflow = 0;

  put2byte(&aData[hdr+1], 0);
  put2byte(&aData[hdr+3], pPg->nCell);
  put2byte(&aData[hdr+5], pData - aData);
  aData[hdr+7] = 0x00;
}

static void editPage(
  MemPage *pPg,                   /* Edit this page */
  int iOld,                       /* Index of first cell currently on page */
  int iNew,                       /* Index of new first cell on page */
  int nNew,                       /* Final number of cells on page */
  u8 **apCell,                    /* Array of cells */
  u16 *szCell                     /* Array of cell sizes */
){

  if( 1 ){
    u8 * const aData = pPg->aData;
    const int hdr = pPg->hdrOffset;
    u8 *pBegin = &pPg->aCellIdx[nNew * 2];
    int nFree = pPg->nFree;       /* Free bytes on pPg */
    int nCell = pPg->nCell;       /* Cells stored on pPg */
    u8 *pData;
    u8 *pCellptr;
    int i;
    int iOldEnd = iOld + pPg->nCell + pPg->nOverflow;

#ifdef SQLITE_DEBUG
    u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
    memcpy(pTmp, aData, pPg->pBt->usableSize);
#endif

    /* Remove cells from the start and end of the page */
    if( iOld<iNew ){
      int nShift = 0;
      for(i=iOld; i<iNew; i++){
        if( apCell[i]>aData && apCell[i]<&aData[pPg->pBt->usableSize] ){
          freeSpace(pPg, apCell[i] - aData, szCell[i]);
          nFree += szCell[i] + 2;
          nShift++;
        }
      }
      nCell -= nShift;
      memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2);
    }
    for(i=iNew+nNew; i<iOldEnd; i++){
      if( apCell[i]>aData && apCell[i]<&aData[pPg->pBt->usableSize] ){
        freeSpace(pPg, apCell[i] - aData, szCell[i]);
        nFree += szCell[i] + 2;
        nCell--;
      }
    }
    pData = &aData[get2byte(&aData[hdr+5])];
    if( pData<pBegin ) goto editpage_fail;

    /* Add cells to the start of the page */
    if( iNew<iOld ){
      pCellptr = pPg->aCellIdx;
      memmove(&pCellptr[(iOld-iNew)*2], pCellptr, nCell*2);
      for(i=iNew; i<iOld; i++){
        pData -= szCell[i];
        if( pData<pBegin ) goto editpage_fail;
        memcpy(pData, apCell[i], szCell[i]);
        put2byte(pCellptr, (pData - aData));
        pCellptr += 2;
        nFree -= (szCell[i] + 2);
        nCell++;
      }
    }

    /* Add any overflow cells */
    for(i=0; i<pPg->nOverflow; i++){
      int iCell = (iOld + pPg->aiOvfl[i]) - iNew;
      if( iCell>=0 && iCell<nNew ){
        u8 *pCellptr = &pPg->aCellIdx[iCell * 2];
        int sz = szCell[iCell+iNew];
        memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2);
        pData -= sz;
        if( pData<pBegin ) goto editpage_fail;
        memcpy(pData, apCell[iCell+iNew], sz);
        put2byte(pCellptr, (pData - aData));
        nFree -= (sz + 2);
        nCell++;
      }
    }

    /* Append cells to the end of the page */
    pCellptr = &pPg->aCellIdx[nCell*2];
    for(i=iNew+nCell; i<(iNew+nNew); i++){
      pData -= szCell[i];
      if( pData<pBegin ) goto editpage_fail;
      memcpy(pData, apCell[i], szCell[i]);
      put2byte(pCellptr, (pData - aData));
      pCellptr += 2;
      nFree -= (szCell[i] + 2);
    }

    pPg->nFree = nFree;
    pPg->nCell = nNew;
    pPg->nOverflow = 0;

    put2byte(&aData[hdr+3], pPg->nCell);
    put2byte(&aData[hdr+5], pData - aData);

#ifdef SQLITE_DEBUG
    for(i=0; i<nNew; i++){
      u8 *pCell = apCell[i+iNew];
      int iOff = get2byte(&pPg->aCellIdx[i*2]);
      if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
        pCell = &pTmp[pCell - aData];
      }
      assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) );
    }
#endif

#if 0
  printf("EDIT\n");
#endif

    return;
  }

 editpage_fail:
#if 0
  printf("REBUILD\n");
#endif
  /* Unable to edit this page. Rebuild it from scratch instead. */
  rebuildPage(pPg, nNew, &apCell[iNew], &szCell[iNew]);
}

/*
** The following parameters determine how many adjacent pages get involved
** in a balancing operation.  NN is the number of neighbors on either side
** of the page that participate in the balancing operation.  NB is the
** total number of pages that participate, including the target page and
** NN neighbors on either side.
6308
6309
6310
6311
6312
6313
6314

6315
6316
6317
6318
6319
6320
6321
  int iOvflSpace = 0;          /* First unused byte of aOvflSpace[] */
  int szScratch;               /* Size of scratch memory requested */
  MemPage *apOld[NB];          /* pPage and up to two siblings */
  MemPage *apNew[NB+2];        /* pPage and up to NB siblings after balancing */
  u8 *pRight;                  /* Location in parent of right-sibling pointer */
  u8 *apDiv[NB-1];             /* Divider cells in pParent */
  int cntNew[NB+2];            /* Index in aCell[] of cell after i-th page */

  int szNew[NB+2];             /* Combined size of cells place on i-th page */
  u8 **apCell = 0;             /* All cells begin balanced */
  u16 *szCell;                 /* Local size of all cells in apCell[] */
  u8 *aSpace1;                 /* Space for copies of dividers cells */
  Pgno pgno;                   /* Temp var to store a page number in */

  int aShiftLeft[NB+2];







>







6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
  int iOvflSpace = 0;          /* First unused byte of aOvflSpace[] */
  int szScratch;               /* Size of scratch memory requested */
  MemPage *apOld[NB];          /* pPage and up to two siblings */
  MemPage *apNew[NB+2];        /* pPage and up to NB siblings after balancing */
  u8 *pRight;                  /* Location in parent of right-sibling pointer */
  u8 *apDiv[NB-1];             /* Divider cells in pParent */
  int cntNew[NB+2];            /* Index in aCell[] of cell after i-th page */
  int cntOld[NB+2];            /* Old index in aCell[] after i-th page */
  int szNew[NB+2];             /* Combined size of cells place on i-th page */
  u8 **apCell = 0;             /* All cells begin balanced */
  u16 *szCell;                 /* Local size of all cells in apCell[] */
  u8 *aSpace1;                 /* Space for copies of dividers cells */
  Pgno pgno;                   /* Temp var to store a page number in */

  int aShiftLeft[NB+2];
6483
6484
6485
6486
6487
6488
6489

6490
6491
6492
6493
6494
6495
6496
      for(j=0; j<limit; j++){
        assert( nCell<nMaxCells );
        apCell[nCell] = findCellv2(aData, maskPage, cellOffset, j);
        szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
        nCell++;
      }
    }       

    if( i<nOld-1 && !leafData){
      u16 sz = (u16)szNew[i];
      u8 *pTemp;
      assert( nCell<nMaxCells );
      szCell[nCell] = sz;
      pTemp = &aSpace1[iSpace1];
      iSpace1 += sz;







>







6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
      for(j=0; j<limit; j++){
        assert( nCell<nMaxCells );
        apCell[nCell] = findCellv2(aData, maskPage, cellOffset, j);
        szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
        nCell++;
      }
    }       
    cntOld[i] = nCell;
    if( i<nOld-1 && !leafData){
      u16 sz = (u16)szNew[i];
      u8 *pTemp;
      assert( nCell<nMaxCells );
      szCell[nCell] = sz;
      pTemp = &aSpace1[iSpace1];
      iSpace1 += sz;
6620
6621
6622
6623
6624
6625
6626

6627
6628
6629
6630
6631
6632
6633
    }else{
      assert( i>0 );
      rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0);
      if( rc ) goto balance_cleanup;
      zeroPage(pNew, pageFlags);
      apNew[i] = pNew;
      nNew++;


      /* Set the pointer-map entry for the new sibling page. */
      if( ISAUTOVACUUM ){
        ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc);
        if( rc!=SQLITE_OK ){
          goto balance_cleanup;
        }







>







6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
    }else{
      assert( i>0 );
      rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0);
      if( rc ) goto balance_cleanup;
      zeroPage(pNew, pageFlags);
      apNew[i] = pNew;
      nNew++;
      cntOld[i] = nCell;

      /* Set the pointer-map entry for the new sibling page. */
      if( ISAUTOVACUUM ){
        ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc);
        if( rc!=SQLITE_OK ){
          goto balance_cleanup;
        }
6689
6690
6691
6692
6693
6694
6695



























6696
6697
6698
6699
6700
6701
6702
  for(i=0; i<nNew; i++){
    /* At this point, "j" is the apCell[] index of the first cell currently
    ** stored on page apNew[i]. Or, if apNew[i] was not one of the original 
    ** sibling pages, "j" should be set to nCell. Variable iFirst is set
    ** to the apCell[] index of the first cell that will appear on the
    ** page following this balancing operation.  */
    int iFirst = (i==0 ? 0 : cntNew[i-1] + !leafData);     /* new first cell */



























    assert( i<nOld || j==nCell );
    aShiftLeft[i] = j - iFirst;
    j += apNew[i]->nCell + apNew[i]->nOverflow;
    aShiftRight[i] = cntNew[i] - j;
    assert( i!=nOld-1 || j==nCell );
    if( j<nCell ) j += !leafData;
  }







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
  for(i=0; i<nNew; i++){
    /* At this point, "j" is the apCell[] index of the first cell currently
    ** stored on page apNew[i]. Or, if apNew[i] was not one of the original 
    ** sibling pages, "j" should be set to nCell. Variable iFirst is set
    ** to the apCell[] index of the first cell that will appear on the
    ** page following this balancing operation.  */
    int iFirst = (i==0 ? 0 : cntNew[i-1] + !leafData);     /* new first cell */

#if 0
    MemPage *pNew = apNew[i];
    int iCell;
    int nCta = 0;
    int nFree;

    printf("REBUILD %d: %d@%d -> %d@%d", apNew[i]->pgno,
          pNew->nCell+pNew->nOverflow, j,
          cntNew[i] - iFirst, iFirst
    );
    for(iCell=iFirst; iCell<cntNew[i]; iCell++){
      if( apCell[iCell]<pNew->aData 
       || apCell[iCell]>=&pNew->aData[pBt->usableSize] 
      ){
        nCta += szCell[iCell];
      }
    }
    nFree = get2byte(&pNew->aData[pNew->hdrOffset+5]);
    nFree -= (pNew->cellOffset + (cntNew[i] - iFirst) * 2);
    printf(" cta=%d free=%d\n", nCta, nFree);
    if( i==(nNew-1) ){
      printf("-----\n");
      fflush(stdout);
    }
#endif

    assert( i<nOld || j==nCell );
    aShiftLeft[i] = j - iFirst;
    j += apNew[i]->nCell + apNew[i]->nOverflow;
    aShiftRight[i] = cntNew[i] - j;
    assert( i!=nOld-1 || j==nCell );
    if( j<nCell ) j += !leafData;
  }
6831
6832
6833
6834
6835
6836
6837



6838





6839
6840
6841
6842

6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
  assert( aShiftRight[nNew-1]>=0 && aShiftLeft[0]==0 );
  for(i=0; i<nNew*2; i++){
    int iPg = (i>=nNew ? i-nNew : nNew-1-i);
    if( abDone[iPg]==0 
     && (aShiftLeft[iPg]>=0 || abDone[iPg-1])
     && (aShiftRight[iPg]>=0 || abDone[iPg+1])
    ){



      MemPage *pNew = apNew[iPg];





      int iLeft = ((iPg==0) ? 0 : cntNew[iPg-1] + !leafData);
      rebuildPage(pNew, 
          aShiftLeft[iPg] < 0 ? (aShiftLeft[iPg]*-1) : 0,
          cntNew[iPg] - iLeft,

          &apCell[iLeft],
          &szCell[iLeft]
      );
      abDone[iPg] = 1;
      assert( pNew->nOverflow==0 );
      assert( pNew->nCell==(cntNew[iPg] - (iPg==0?0:cntNew[iPg-1]+!leafData)) );
    }
  }
  assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 );

  assert( nOld>0 );
  assert( nNew>0 );








>
>
>
|
>
>
>
>
>
|
<
<
|
>
|
|
<

|
|







6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999


7000
7001
7002
7003

7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
  assert( aShiftRight[nNew-1]>=0 && aShiftLeft[0]==0 );
  for(i=0; i<nNew*2; i++){
    int iPg = (i>=nNew ? i-nNew : nNew-1-i);
    if( abDone[iPg]==0 
     && (aShiftLeft[iPg]>=0 || abDone[iPg-1])
     && (aShiftRight[iPg]>=0 || abDone[iPg+1])
    ){
      int iNew;
      int iOld;
      int nNewCell;

      if( iPg==0 ){
        iNew = iOld = 0;
        nNewCell = cntNew[0];
      }else{
        iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : nCell;
        iNew = cntNew[iPg-1] + !leafData;


        nNewCell = cntNew[iPg] - iNew;
      }

      editPage(apNew[iPg], iOld, iNew, nNewCell, apCell, szCell);

      abDone[iPg] = 1;
      assert( apNew[iPg]->nOverflow==0 );
      assert( apNew[iPg]->nCell==nNewCell );
    }
  }
  assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 );

  assert( nOld>0 );
  assert( nNew>0 );

6865
6866
6867
6868
6869
6870
6871

6872
6873
6874
6875
6876
6877
6878
    ** for which the pointer is stored within the content being copied.
    **
    ** The second assert below verifies that the child page is defragmented
    ** (it must be, as it was just reconstructed using assemblePage()). This
    ** is important if the parent page happens to be page 1 of the database
    ** image.  */
    assert( nNew==1 );

    assert( apNew[0]->nFree == 
        (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2) 
    );
    copyNodeContent(apNew[0], pParent, &rc);
    freePage(apNew[0], &rc);
  }else if( ISAUTOVACUUM && !leafCorrection ){
    /* Fix the pointer map entries associated with the right-child of each







>







7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
    ** for which the pointer is stored within the content being copied.
    **
    ** The second assert below verifies that the child page is defragmented
    ** (it must be, as it was just reconstructed using assemblePage()). This
    ** is important if the parent page happens to be page 1 of the database
    ** image.  */
    assert( nNew==1 );
    defragmentPage(apNew[0]);
    assert( apNew[0]->nFree == 
        (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2) 
    );
    copyNodeContent(apNew[0], pParent, &rc);
    freePage(apNew[0], &rc);
  }else if( ISAUTOVACUUM && !leafCorrection ){
    /* Fix the pointer map entries associated with the right-child of each