/ Check-in [8bfcda3d]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Make use of built-in bswap32() and bswap16() functions in GCC/Clang for a significant performance improvement there.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 8bfcda3d10aec864d71d12a1248c37e4db6f8899
User & Date: drh 2015-06-30 15:10:29
Context
2015-06-30
17:28
Make use of the _byteswap_ushort() and _byteswap_ulong() compiler intrinsics for MSVC, when available. check-in: fe144dd7 user: mistachkin tags: trunk
16:29
Merge all the latest enhancements from trunk. This merge include FTS5 and a number of notable performance enhancements. check-in: 39936b33 user: drh tags: sessions
15:10
Make use of built-in bswap32() and bswap16() functions in GCC/Clang for a significant performance improvement there. check-in: 8bfcda3d user: drh tags: trunk
14:01
Only use __builtin_bswap16() with GCC 4.8 and later. Closed-Leaf check-in: ce8177e3 user: drh tags: bswap-functions
11:07
Change an unreachable branch into an assert() in sqlite3PagerAcquire() and optimize sqlite3PcacheOpenSavepoint() by factoring out rarely used code into a subroutine. check-in: b406b20e user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to src/btree.c.

967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
....
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
....
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
....
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
....
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
**
** findCellPastPtr() does the same except it skips past the initial
** 4-byte child pointer found on interior pages, if there is one.
**
** This routine works only for pages that do not contain overflow cells.
*/
#define findCell(P,I) \
  ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)])))
#define findCellPastPtr(P,I) \
  ((P)->aDataOfst + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)])))


/*
** This is common tail processing for btreeParseCellPtr() and
** btreeParseCellPtrIndex() for the case when the cell does not fit entirely
** on a single B-tree page.  Make necessary adjustments to the CellInfo
** structure.
................................................................................
    iCellLast = usableSize - 4;
    if( pBt->db->flags & SQLITE_CellSizeCk ){
      int i;            /* Index into the cell pointer array */
      int sz;           /* Size of a cell */

      if( !pPage->leaf ) iCellLast--;
      for(i=0; i<pPage->nCell; i++){
        pc = get2byte(&data[cellOffset+i*2]);
        testcase( pc==iCellFirst );
        testcase( pc==iCellLast );
        if( pc<iCellFirst || pc>iCellLast ){
          return SQLITE_CORRUPT_BKPT;
        }
        sz = pPage->xCellSize(pPage, &data[pc]);
        testcase( pc+sz==usableSize );
................................................................................

  put2byte(&aData[hdr+3], pPg->nCell);
  put2byte(&aData[hdr+5], pData - aData);

#ifdef SQLITE_DEBUG
  for(i=0; i<nNew && !CORRUPT_DB; i++){
    u8 *pCell = pCArray->apCell[i+iNew];
    int iOff = get2byte(&pPg->aCellIdx[i*2]);
    if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
      pCell = &pTmp[pCell - aData];
    }
    assert( 0==memcmp(pCell, &aData[iOff],
            pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) );
  }
#endif
................................................................................
    ** first.
    */
    memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit);
    if( pOld->nOverflow>0 ){
      memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow);
      limit = pOld->aiOvfl[0];
      for(j=0; j<limit; j++){
        b.apCell[b.nCell] = aData + (maskPage & get2byte(piCell));
        piCell += 2;
        b.nCell++;
      }
      for(k=0; k<pOld->nOverflow; k++){
        assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */
        b.apCell[b.nCell] = pOld->apOvfl[k];
        b.nCell++;
      }
    }
    piEnd = aData + pOld->cellOffset + 2*pOld->nCell;
    while( piCell<piEnd ){
      assert( b.nCell<nMaxCells );
      b.apCell[b.nCell] = aData + (maskPage & get2byte(piCell));
      piCell += 2;
      b.nCell++;
    }

    cntOld[i] = b.nCell;
    if( i<nOld-1 && !leafData){
      u16 sz = (u16)szNew[i];
................................................................................
    nCell = get2byte(&data[hdr+3]);
    /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page
    ** immediately follows the b-tree page header. */
    cellStart = hdr + 12 - 4*pPage->leaf;
    /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte
    ** integer offsets to the cell contents. */
    for(i=0; i<nCell; i++){
      int pc = get2byte(&data[cellStart+i*2]);
      u32 size = 65536;
      if( pc<=usableSize-4 ){
        size = pPage->xCellSize(pPage, &data[pc]);
      }
      if( (int)(pc+size-1)>=usableSize ){
        pCheck->zPfx = 0;
        checkAppendMsg(pCheck,







|

|







 







|







 







|







 







|












|







 







|







967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
....
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
....
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
....
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
....
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
**
** findCellPastPtr() does the same except it skips past the initial
** 4-byte child pointer found on interior pages, if there is one.
**
** This routine works only for pages that do not contain overflow cells.
*/
#define findCell(P,I) \
  ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
#define findCellPastPtr(P,I) \
  ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))


/*
** This is common tail processing for btreeParseCellPtr() and
** btreeParseCellPtrIndex() for the case when the cell does not fit entirely
** on a single B-tree page.  Make necessary adjustments to the CellInfo
** structure.
................................................................................
    iCellLast = usableSize - 4;
    if( pBt->db->flags & SQLITE_CellSizeCk ){
      int i;            /* Index into the cell pointer array */
      int sz;           /* Size of a cell */

      if( !pPage->leaf ) iCellLast--;
      for(i=0; i<pPage->nCell; i++){
        pc = get2byteAligned(&data[cellOffset+i*2]);
        testcase( pc==iCellFirst );
        testcase( pc==iCellLast );
        if( pc<iCellFirst || pc>iCellLast ){
          return SQLITE_CORRUPT_BKPT;
        }
        sz = pPage->xCellSize(pPage, &data[pc]);
        testcase( pc+sz==usableSize );
................................................................................

  put2byte(&aData[hdr+3], pPg->nCell);
  put2byte(&aData[hdr+5], pData - aData);

#ifdef SQLITE_DEBUG
  for(i=0; i<nNew && !CORRUPT_DB; i++){
    u8 *pCell = pCArray->apCell[i+iNew];
    int iOff = get2byteAligned(&pPg->aCellIdx[i*2]);
    if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
      pCell = &pTmp[pCell - aData];
    }
    assert( 0==memcmp(pCell, &aData[iOff],
            pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) );
  }
#endif
................................................................................
    ** first.
    */
    memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit);
    if( pOld->nOverflow>0 ){
      memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow);
      limit = pOld->aiOvfl[0];
      for(j=0; j<limit; j++){
        b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
        piCell += 2;
        b.nCell++;
      }
      for(k=0; k<pOld->nOverflow; k++){
        assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */
        b.apCell[b.nCell] = pOld->apOvfl[k];
        b.nCell++;
      }
    }
    piEnd = aData + pOld->cellOffset + 2*pOld->nCell;
    while( piCell<piEnd ){
      assert( b.nCell<nMaxCells );
      b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
      piCell += 2;
      b.nCell++;
    }

    cntOld[i] = b.nCell;
    if( i<nOld-1 && !leafData){
      u16 sz = (u16)szNew[i];
................................................................................
    nCell = get2byte(&data[hdr+3]);
    /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page
    ** immediately follows the b-tree page header. */
    cellStart = hdr + 12 - 4*pPage->leaf;
    /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte
    ** integer offsets to the cell contents. */
    for(i=0; i<nCell; i++){
      int pc = get2byteAligned(&data[cellStart+i*2]);
      u32 size = 65536;
      if( pc<=usableSize-4 ){
        size = pPage->xCellSize(pPage, &data[pc]);
      }
      if( (int)(pc+size-1)>=usableSize ){
        pCheck->zPfx = 0;
        checkAppendMsg(pCheck,

Changes to src/btreeInt.h.

687
688
689
690
691
692
693













/*
** Routines to read or write a two- and four-byte big-endian integer values.
*/
#define get2byte(x)   ((x)[0]<<8 | (x)[1])
#define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v))
#define get4byte sqlite3Get4byte
#define put4byte sqlite3Put4byte




















>
>
>
>
>
>
>
>
>
>
>
>
>
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
/*
** Routines to read or write a two- and four-byte big-endian integer values.
*/
#define get2byte(x)   ((x)[0]<<8 | (x)[1])
#define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v))
#define get4byte sqlite3Get4byte
#define put4byte sqlite3Put4byte

/*
** get2byteAligned(), unlike get2byte(), requires that its argument point to a
** two-byte aligned address.  get2bytea() is only used for accessing the
** cell addresses in a btree header.
*/
#if SQLITE_BYTEORDER==4321
# define get2byteAligned(x)  (*(u16*)(x))
#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000
# define get2byteAligned(x)  __builtin_bswap16(*(u16*)(x))
#else
# define get2byteAligned(x)  ((x)[0]<<8 | (x)[1])
#endif

Changes to src/sqliteInt.h.

55
56
57
58
59
60
61







62
63
64
65
66
67
68
#ifndef SQLITE_DISABLE_LFS
# define _LARGE_FILE       1
# ifndef _FILE_OFFSET_BITS
#   define _FILE_OFFSET_BITS 64
# endif
# define _LARGEFILE_SOURCE 1
#endif








/* Needed for various definitions... */
#if defined(__GNUC__) && !defined(_GNU_SOURCE)
# define _GNU_SOURCE
#endif

#if defined(__OpenBSD__) && !defined(_BSD_SOURCE)







>
>
>
>
>
>
>







55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#ifndef SQLITE_DISABLE_LFS
# define _LARGE_FILE       1
# ifndef _FILE_OFFSET_BITS
#   define _FILE_OFFSET_BITS 64
# endif
# define _LARGEFILE_SOURCE 1
#endif

/* What version of GCC is being used.  0 means GCC is not being used */
#ifdef __GNUC__
# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
#else
# define GCC_VERSION 0
#endif

/* Needed for various definitions... */
#if defined(__GNUC__) && !defined(_GNU_SOURCE)
# define _GNU_SOURCE
#endif

#if defined(__OpenBSD__) && !defined(_BSD_SOURCE)

Changes to src/util.c.

1074
1075
1076
1077
1078
1079
1080









1081
1082

1083
1084






1085
1086
1087
1088

1089
1090
1091
1092
1093
1094
1095
}


/*
** Read or write a four-byte big-endian integer value.
*/
u32 sqlite3Get4byte(const u8 *p){









  testcase( p[0]&0x80 );
  return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];

}
void sqlite3Put4byte(unsigned char *p, u32 v){






  p[0] = (u8)(v>>24);
  p[1] = (u8)(v>>16);
  p[2] = (u8)(v>>8);
  p[3] = (u8)v;

}



/*
** Translate a single byte of Hex into an integer.
** This routine only works if h really is a valid hexadecimal







>
>
>
>
>
>
>
>
>


>


>
>
>
>
>
>




>







1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
}


/*
** Read or write a four-byte big-endian integer value.
*/
u32 sqlite3Get4byte(const u8 *p){
#if SQLITE_BYTEORDER==4321
  u32 x;
  memcpy(&x,p,4);
  return x;
#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__)
  u32 x;
  memcpy(&x,p,4);
  return __builtin_bswap32(x);
#else
  testcase( p[0]&0x80 );
  return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
#endif
}
void sqlite3Put4byte(unsigned char *p, u32 v){
#if SQLITE_BYTEORDER==4321
  memcpy(p,&v,4);
#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__)
  u32 x = __builtin_bswap32(v);
  memcpy(p,&x,4);
#else
  p[0] = (u8)(v>>24);
  p[1] = (u8)(v>>16);
  p[2] = (u8)(v>>8);
  p[3] = (u8)v;
#endif
}



/*
** Translate a single byte of Hex into an integer.
** This routine only works if h really is a valid hexadecimal