/ Check-in [8bfcda3d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Make use of built-in bswap32() and bswap16() functions in GCC/Clang for a significant performance improvement there.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 8bfcda3d10aec864d71d12a1248c37e4db6f8899
User & Date: drh 2015-06-30 15:10:29
Context
2015-06-30
17:28
Make use of the _byteswap_ushort() and _byteswap_ulong() compiler intrinsics for MSVC, when available. check-in: fe144dd7 user: mistachkin tags: trunk
16:29
Merge all the latest enhancements from trunk. This merge include FTS5 and a number of notable performance enhancements. check-in: 39936b33 user: drh tags: sessions
15:10
Make use of built-in bswap32() and bswap16() functions in GCC/Clang for a significant performance improvement there. check-in: 8bfcda3d user: drh tags: trunk
14:01
Only use __builtin_bswap16() with GCC 4.8 and later. Closed-Leaf check-in: ce8177e3 user: drh tags: bswap-functions
11:07
Change an unreachable branch into an assert() in sqlite3PagerAcquire() and optimize sqlite3PcacheOpenSavepoint() by factoring out rarely used code into a subroutine. check-in: b406b20e user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

   967    967   **
   968    968   ** findCellPastPtr() does the same except it skips past the initial
   969    969   ** 4-byte child pointer found on interior pages, if there is one.
   970    970   **
   971    971   ** This routine works only for pages that do not contain overflow cells.
   972    972   */
   973    973   #define findCell(P,I) \
   974         -  ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)])))
          974  +  ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
   975    975   #define findCellPastPtr(P,I) \
   976         -  ((P)->aDataOfst + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)])))
          976  +  ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
   977    977   
   978    978   
   979    979   /*
   980    980   ** This is common tail processing for btreeParseCellPtr() and
   981    981   ** btreeParseCellPtrIndex() for the case when the cell does not fit entirely
   982    982   ** on a single B-tree page.  Make necessary adjustments to the CellInfo
   983    983   ** structure.
................................................................................
  1750   1750       iCellLast = usableSize - 4;
  1751   1751       if( pBt->db->flags & SQLITE_CellSizeCk ){
  1752   1752         int i;            /* Index into the cell pointer array */
  1753   1753         int sz;           /* Size of a cell */
  1754   1754   
  1755   1755         if( !pPage->leaf ) iCellLast--;
  1756   1756         for(i=0; i<pPage->nCell; i++){
  1757         -        pc = get2byte(&data[cellOffset+i*2]);
         1757  +        pc = get2byteAligned(&data[cellOffset+i*2]);
  1758   1758           testcase( pc==iCellFirst );
  1759   1759           testcase( pc==iCellLast );
  1760   1760           if( pc<iCellFirst || pc>iCellLast ){
  1761   1761             return SQLITE_CORRUPT_BKPT;
  1762   1762           }
  1763   1763           sz = pPage->xCellSize(pPage, &data[pc]);
  1764   1764           testcase( pc+sz==usableSize );
................................................................................
  6621   6621   
  6622   6622     put2byte(&aData[hdr+3], pPg->nCell);
  6623   6623     put2byte(&aData[hdr+5], pData - aData);
  6624   6624   
  6625   6625   #ifdef SQLITE_DEBUG
  6626   6626     for(i=0; i<nNew && !CORRUPT_DB; i++){
  6627   6627       u8 *pCell = pCArray->apCell[i+iNew];
  6628         -    int iOff = get2byte(&pPg->aCellIdx[i*2]);
         6628  +    int iOff = get2byteAligned(&pPg->aCellIdx[i*2]);
  6629   6629       if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
  6630   6630         pCell = &pTmp[pCell - aData];
  6631   6631       }
  6632   6632       assert( 0==memcmp(pCell, &aData[iOff],
  6633   6633               pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) );
  6634   6634     }
  6635   6635   #endif
................................................................................
  7123   7123       ** first.
  7124   7124       */
  7125   7125       memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit);
  7126   7126       if( pOld->nOverflow>0 ){
  7127   7127         memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow);
  7128   7128         limit = pOld->aiOvfl[0];
  7129   7129         for(j=0; j<limit; j++){
  7130         -        b.apCell[b.nCell] = aData + (maskPage & get2byte(piCell));
         7130  +        b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
  7131   7131           piCell += 2;
  7132   7132           b.nCell++;
  7133   7133         }
  7134   7134         for(k=0; k<pOld->nOverflow; k++){
  7135   7135           assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */
  7136   7136           b.apCell[b.nCell] = pOld->apOvfl[k];
  7137   7137           b.nCell++;
  7138   7138         }
  7139   7139       }
  7140   7140       piEnd = aData + pOld->cellOffset + 2*pOld->nCell;
  7141   7141       while( piCell<piEnd ){
  7142   7142         assert( b.nCell<nMaxCells );
  7143         -      b.apCell[b.nCell] = aData + (maskPage & get2byte(piCell));
         7143  +      b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
  7144   7144         piCell += 2;
  7145   7145         b.nCell++;
  7146   7146       }
  7147   7147   
  7148   7148       cntOld[i] = b.nCell;
  7149   7149       if( i<nOld-1 && !leafData){
  7150   7150         u16 sz = (u16)szNew[i];
................................................................................
  9101   9101       nCell = get2byte(&data[hdr+3]);
  9102   9102       /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page
  9103   9103       ** immediately follows the b-tree page header. */
  9104   9104       cellStart = hdr + 12 - 4*pPage->leaf;
  9105   9105       /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte
  9106   9106       ** integer offsets to the cell contents. */
  9107   9107       for(i=0; i<nCell; i++){
  9108         -      int pc = get2byte(&data[cellStart+i*2]);
         9108  +      int pc = get2byteAligned(&data[cellStart+i*2]);
  9109   9109         u32 size = 65536;
  9110   9110         if( pc<=usableSize-4 ){
  9111   9111           size = pPage->xCellSize(pPage, &data[pc]);
  9112   9112         }
  9113   9113         if( (int)(pc+size-1)>=usableSize ){
  9114   9114           pCheck->zPfx = 0;
  9115   9115           checkAppendMsg(pCheck,

Changes to src/btreeInt.h.

   687    687   /*
   688    688   ** Routines to read or write a two- and four-byte big-endian integer values.
   689    689   */
   690    690   #define get2byte(x)   ((x)[0]<<8 | (x)[1])
   691    691   #define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v))
   692    692   #define get4byte sqlite3Get4byte
   693    693   #define put4byte sqlite3Put4byte
          694  +
          695  +/*
          696  +** get2byteAligned(), unlike get2byte(), requires that its argument point to a
          697  +** two-byte aligned address.  get2bytea() is only used for accessing the
          698  +** cell addresses in a btree header.
          699  +*/
          700  +#if SQLITE_BYTEORDER==4321
          701  +# define get2byteAligned(x)  (*(u16*)(x))
          702  +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000
          703  +# define get2byteAligned(x)  __builtin_bswap16(*(u16*)(x))
          704  +#else
          705  +# define get2byteAligned(x)  ((x)[0]<<8 | (x)[1])
          706  +#endif

Changes to src/sqliteInt.h.

    55     55   #ifndef SQLITE_DISABLE_LFS
    56     56   # define _LARGE_FILE       1
    57     57   # ifndef _FILE_OFFSET_BITS
    58     58   #   define _FILE_OFFSET_BITS 64
    59     59   # endif
    60     60   # define _LARGEFILE_SOURCE 1
    61     61   #endif
           62  +
           63  +/* What version of GCC is being used.  0 means GCC is not being used */
           64  +#ifdef __GNUC__
           65  +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
           66  +#else
           67  +# define GCC_VERSION 0
           68  +#endif
    62     69   
    63     70   /* Needed for various definitions... */
    64     71   #if defined(__GNUC__) && !defined(_GNU_SOURCE)
    65     72   # define _GNU_SOURCE
    66     73   #endif
    67     74   
    68     75   #if defined(__OpenBSD__) && !defined(_BSD_SOURCE)

Changes to src/util.c.

  1074   1074   }
  1075   1075   
  1076   1076   
  1077   1077   /*
  1078   1078   ** Read or write a four-byte big-endian integer value.
  1079   1079   */
  1080   1080   u32 sqlite3Get4byte(const u8 *p){
         1081  +#if SQLITE_BYTEORDER==4321
         1082  +  u32 x;
         1083  +  memcpy(&x,p,4);
         1084  +  return x;
         1085  +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__)
         1086  +  u32 x;
         1087  +  memcpy(&x,p,4);
         1088  +  return __builtin_bswap32(x);
         1089  +#else
  1081   1090     testcase( p[0]&0x80 );
  1082   1091     return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
         1092  +#endif
  1083   1093   }
  1084   1094   void sqlite3Put4byte(unsigned char *p, u32 v){
         1095  +#if SQLITE_BYTEORDER==4321
         1096  +  memcpy(p,&v,4);
         1097  +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__)
         1098  +  u32 x = __builtin_bswap32(v);
         1099  +  memcpy(p,&x,4);
         1100  +#else
  1085   1101     p[0] = (u8)(v>>24);
  1086   1102     p[1] = (u8)(v>>16);
  1087   1103     p[2] = (u8)(v>>8);
  1088   1104     p[3] = (u8)v;
         1105  +#endif
  1089   1106   }
  1090   1107   
  1091   1108   
  1092   1109   
  1093   1110   /*
  1094   1111   ** Translate a single byte of Hex into an integer.
  1095   1112   ** This routine only works if h really is a valid hexadecimal