SQLite

Check-in [82fcd54a59]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use compiler intrinsic functions (when available) for byteswapping in RTREE.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 82fcd54a5941c20895ffc22d8009c1ebdae44eda
User & Date: drh 2017-02-01 15:24:32.835
Context
2017-02-01
15:49
Precompute the nDim2 value in the Rtree object and use that to make loops over coordinates faster. (check-in: f1f3c8cc73 user: drh tags: trunk)
15:24
Use compiler intrinsic functions (when available) for byteswapping in RTREE. (check-in: 82fcd54a59 user: drh tags: trunk)
15:19
Fix the build by making the OPFLAG_ISNOOP macro available unconditionally. (check-in: 510933cb24 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/rtree/rtree.c.
358
359
360
361
362
363
364


























365

































366
367
368
369
370
371
372

373
374



375
376
377
378
379
380
381
382
383
384
385
386
387
388














389
390
391
392
393
394
395
396
397
398

399
400
401
402
403
404
405
406
407
408
409
410
411
412

413
414










415
416
417
418
419

420
421
422










423
424
425
426
427
428
429
430

431
432
433
434
435
436
437
#ifndef MAX
# define MAX(x,y) ((x) < (y) ? (y) : (x))
#endif
#ifndef MIN
# define MIN(x,y) ((x) > (y) ? (y) : (x))
#endif



























/*

































** Functions to deserialize a 16 bit integer, 32 bit real number and
** 64 bit integer. The deserialized value is returned.
*/
static int readInt16(u8 *p){
  return (p[0]<<8) + p[1];
}
static void readCoord(u8 *p, RtreeCoord *pCoord){

#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234
  memcpy(&pCoord->u, p, 4);



  pCoord->u = ((pCoord->u>>24)&0xff)|((pCoord->u>>8)&0xff00)|
              ((pCoord->u&0xff)<<24)|((pCoord->u&0xff00)<<8);
#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321
  memcpy(&pCoord->u, p, 4);
#else
  pCoord->u = (
    (((u32)p[0]) << 24) + 
    (((u32)p[1]) << 16) + 
    (((u32)p[2]) <<  8) + 
    (((u32)p[3]) <<  0)
  );
#endif
}
static i64 readInt64(u8 *p){














  return (
    (((i64)p[0]) << 56) + 
    (((i64)p[1]) << 48) + 
    (((i64)p[2]) << 40) + 
    (((i64)p[3]) << 32) + 
    (((i64)p[4]) << 24) + 
    (((i64)p[5]) << 16) + 
    (((i64)p[6]) <<  8) + 
    (((i64)p[7]) <<  0)
  );

}

/*
** Functions to serialize a 16 bit integer, 32 bit real number and
** 64 bit integer. The value returned is the number of bytes written
** to the argument buffer (always 2, 4 and 8 respectively).
*/
static int writeInt16(u8 *p, int i){
  p[0] = (i>> 8)&0xFF;
  p[1] = (i>> 0)&0xFF;
  return 2;
}
static int writeCoord(u8 *p, RtreeCoord *pCoord){
  u32 i;

  assert( sizeof(RtreeCoord)==4 );
  assert( sizeof(u32)==4 );










  i = pCoord->u;
  p[0] = (i>>24)&0xFF;
  p[1] = (i>>16)&0xFF;
  p[2] = (i>> 8)&0xFF;
  p[3] = (i>> 0)&0xFF;

  return 4;
}
static int writeInt64(u8 *p, i64 i){










  p[0] = (i>>56)&0xFF;
  p[1] = (i>>48)&0xFF;
  p[2] = (i>>40)&0xFF;
  p[3] = (i>>32)&0xFF;
  p[4] = (i>>24)&0xFF;
  p[5] = (i>>16)&0xFF;
  p[6] = (i>> 8)&0xFF;
  p[7] = (i>> 0)&0xFF;

  return 8;
}

/*
** Increment the reference count of node p.
*/
static void nodeReference(RtreeNode *p){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







>
|
|
>
>
>


|
|










>
>
>
>
>
>
>
>
>
>
>
>
>
>










>














>


>
>
>
>
>
>
>
>
>
>





>



>
>
>
>
>
>
>
>
>
>








>







358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
#ifndef MAX
# define MAX(x,y) ((x) < (y) ? (y) : (x))
#endif
#ifndef MIN
# define MIN(x,y) ((x) > (y) ? (y) : (x))
#endif

/* What version of GCC is being used.  0 means GCC is not being used */
#ifndef GCC_VERSION
#ifdef __GNUC__
# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
#else
# define GCC_VERSION 0
#endif
#endif

/* What version of CLANG is being used.  0 means CLANG is not being used */
#ifndef CLANG_VERSION
#if defined(__clang__) && !defined(_WIN32)
# define CLANG_VERSION \
            (__clang_major__*1000000+__clang_minor__*1000+__clang_patchlevel__)
#else
# define CLANG_VERSION 0
#endif
#endif

/* The testcase() macro should already be defined in the amalgamation.  If
** it is not, make it a no-op.
*/
#ifndef SQLITE_AMALGMATION
# define testcase(X)
#endif

/*
** Macros to determine whether the machine is big or little endian,
** and whether or not that determination is run-time or compile-time.
**
** For best performance, an attempt is made to guess at the byte-order
** using C-preprocessor macros.  If that is unsuccessful, or if
** -DSQLITE_RUNTIME_BYTEORDER=1 is set, then byte-order is determined
** at run-time.
*/
#ifndef SQLITE_BYTEORDER
#if (defined(i386)     || defined(__i386__)   || defined(_M_IX86) ||    \
     defined(__x86_64) || defined(__x86_64__) || defined(_M_X64)  ||    \
     defined(_M_AMD64) || defined(_M_ARM)     || defined(__x86)   ||    \
     defined(__arm__)) && !defined(SQLITE_RUNTIME_BYTEORDER)
# define SQLITE_BYTEORDER    1234
#endif
#if (defined(sparc)    || defined(__ppc__))  \
    && !defined(SQLITE_RUNTIME_BYTEORDER)
# define SQLITE_BYTEORDER    4321
#endif
# define SQLITE_BYTEORDER    0     /* 0 means "unknown at compile-time" */
#endif


/* What version of MSVC is being used.  0 means MSVC is not being used */
#ifndef MSVC_VERSION
#if defined(_MSC_VER)
# define MSVC_VERSION _MSC_VER
#else
# define MSVC_VERSION 0
#endif
#endif

/*
** Functions to deserialize a 16 bit integer, 32 bit real number and
** 64 bit integer. The deserialized value is returned.
*/
static int readInt16(u8 *p){
  return (p[0]<<8) + p[1];
}
static void readCoord(u8 *p, RtreeCoord *pCoord){
  assert( ((((char*)p) - (char*)0)&3)==0 );  /* p is always 4-byte aligned */
#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
  pCoord->u = _byteswap_ulong(*(u32*)p);
#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
  pCoord->u = __builtin_bswap32(*(u32*)p);
#elif SQLITE_BYTEORDER==1234
  pCoord->u = ((pCoord->u>>24)&0xff)|((pCoord->u>>8)&0xff00)|
              ((pCoord->u&0xff)<<24)|((pCoord->u&0xff00)<<8);
#elif SQLITE_BYTEORDER==4321
  pCoord->u = *(u32*)p;
#else
  pCoord->u = (
    (((u32)p[0]) << 24) + 
    (((u32)p[1]) << 16) + 
    (((u32)p[2]) <<  8) + 
    (((u32)p[3]) <<  0)
  );
#endif
}
static i64 readInt64(u8 *p){
  testcase( ((((char*)p) - (char*)0)&7)!=0 );  /* not always 8-byte aligned */
#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
  u64 x;
  memcpy(&x, p, 8);
  return (i64)_byteswap_uint64(x);
#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
  u64 x;
  memcpy(&x, p, 8);
  return (i64)__builtin_bswap64(x);
#elif SQLITE_BYTEORDER==4321
  i64 x;
  memcpy(&x, p, 8);
  return x;
#else
  return (
    (((i64)p[0]) << 56) + 
    (((i64)p[1]) << 48) + 
    (((i64)p[2]) << 40) + 
    (((i64)p[3]) << 32) + 
    (((i64)p[4]) << 24) + 
    (((i64)p[5]) << 16) + 
    (((i64)p[6]) <<  8) + 
    (((i64)p[7]) <<  0)
  );
#endif
}

/*
** Functions to serialize a 16 bit integer, 32 bit real number and
** 64 bit integer. The value returned is the number of bytes written
** to the argument buffer (always 2, 4 and 8 respectively).
*/
static int writeInt16(u8 *p, int i){
  p[0] = (i>> 8)&0xFF;
  p[1] = (i>> 0)&0xFF;
  return 2;
}
static int writeCoord(u8 *p, RtreeCoord *pCoord){
  u32 i;
  assert( ((((char*)p) - (char*)0)&3)==0 );  /* p is always 4-byte aligned */
  assert( sizeof(RtreeCoord)==4 );
  assert( sizeof(u32)==4 );
#if SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
  i = __builtin_bswap32(pCoord->u);
  memcpy(p, &i, 4);
#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
  i = _byteswap_ulong(pCoord->u);
  memcpy(p, &i, 4);
#elif SQLITE_BYTEORDER==4321
  i = pCoord->u;
  memcpy(p, &i, 4);
#else
  i = pCoord->u;
  p[0] = (i>>24)&0xFF;
  p[1] = (i>>16)&0xFF;
  p[2] = (i>> 8)&0xFF;
  p[3] = (i>> 0)&0xFF;
#endif
  return 4;
}
static int writeInt64(u8 *p, i64 i){
  testcase( ((((char*)p) - (char*)0)&7)!=0 );  /* Not always 8-byte aligned */
#if SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
  i = (i64)__builtin_bswap64((u64)i);
  memcpy(p, &i, 8);
#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
  i = (i64)_byteswap_uint64((u64)i);
  memcpy(p, &i, 8);
#elif SQLITE_BYTEORDER==4321
  memcpy(p, &i, 8);
#else
  p[0] = (i>>56)&0xFF;
  p[1] = (i>>48)&0xFF;
  p[2] = (i>>40)&0xFF;
  p[3] = (i>>32)&0xFF;
  p[4] = (i>>24)&0xFF;
  p[5] = (i>>16)&0xFF;
  p[6] = (i>> 8)&0xFF;
  p[7] = (i>> 0)&0xFF;
#endif
  return 8;
}

/*
** Increment the reference count of node p.
*/
static void nodeReference(RtreeNode *p){
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
  pCoord = pCell->aCoord;
  do{
    readCoord(pData, &pCoord[ii]);
    readCoord(pData+4, &pCoord[ii+1]);
    pData += 8;
    ii += 2;
  }while( ii<pRtree->nDim*2 );
#if 0
  for(ii=0; ii<pRtree->nDim*2; ii++){
    readCoord(&pData[ii*4], &pCoord[ii]);
  }
#endif
}


/* Forward declaration for the function that does the work of
** the virtual table module xCreate() and xConnect() methods.
*/
static int rtreeInit(







<
<
<
<
<







851
852
853
854
855
856
857





858
859
860
861
862
863
864
  pCoord = pCell->aCoord;
  do{
    readCoord(pData, &pCoord[ii]);
    readCoord(pData+4, &pCoord[ii+1]);
    pData += 8;
    ii += 2;
  }while( ii<pRtree->nDim*2 );





}


/* Forward declaration for the function that does the work of
** the virtual table module xCreate() and xConnect() methods.
*/
static int rtreeInit(
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937












938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
** Convert raw bits from the on-disk RTree record into a coordinate value.
** The on-disk format is big-endian and needs to be converted for little-
** endian platforms.  The on-disk record stores integer coordinates if
** eInt is true and it stores 32-bit floating point records if eInt is
** false.  a[] is the four bytes of the on-disk record to be decoded.
** Store the results in "r".
**
** There are three versions of this macro, one each for little-endian and
** big-endian processors and a third generic implementation.  The endian-
** specific implementations are much faster and are preferred if the
** processor endianness is known at compile-time.  The SQLITE_BYTEORDER
** macro is part of sqliteInt.h and hence the endian-specific
** implementation will only be used if this module is compiled as part
** of the amalgamation.
*/












#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234
#define RTREE_DECODE_COORD(eInt, a, r) {                        \
    RtreeCoord c;    /* Coordinate decoded */                   \
    memcpy(&c.u,a,4);                                           \
    c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)|                   \
          ((c.u&0xff)<<24)|((c.u&0xff00)<<8);                   \
    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321
#define RTREE_DECODE_COORD(eInt, a, r) {                        \
    RtreeCoord c;    /* Coordinate decoded */                   \
    memcpy(&c.u,a,4);                                           \
    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#else
#define RTREE_DECODE_COORD(eInt, a, r) {                        \







|
<
<
<
|
<
<

>
>
>
>
>
>
>
>
>
>
>
>
|







|







1019
1020
1021
1022
1023
1024
1025
1026



1027


1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
** Convert raw bits from the on-disk RTree record into a coordinate value.
** The on-disk format is big-endian and needs to be converted for little-
** endian platforms.  The on-disk record stores integer coordinates if
** eInt is true and it stores 32-bit floating point records if eInt is
** false.  a[] is the four bytes of the on-disk record to be decoded.
** Store the results in "r".
**
** There are five versions of this macro.  The last one is generic.  The



** other four are various architectures-specific optimizations.


*/
#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
#define RTREE_DECODE_COORD(eInt, a, r) {                        \
    RtreeCoord c;    /* Coordinate decoded */                   \
    c.u = _byteswap_ulong(*(u32*)a);                            \
    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
#define RTREE_DECODE_COORD(eInt, a, r) {                        \
    RtreeCoord c;    /* Coordinate decoded */                   \
    c.u = __builtin_bswap32(*(u32*)a);                          \
    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#elif SQLITE_BYTEORDER==1234
#define RTREE_DECODE_COORD(eInt, a, r) {                        \
    RtreeCoord c;    /* Coordinate decoded */                   \
    memcpy(&c.u,a,4);                                           \
    c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)|                   \
          ((c.u&0xff)<<24)|((c.u&0xff00)<<8);                   \
    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#elif SQLITE_BYTEORDER==4321
#define RTREE_DECODE_COORD(eInt, a, r) {                        \
    RtreeCoord c;    /* Coordinate decoded */                   \
    memcpy(&c.u,a,4);                                           \
    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#else
#define RTREE_DECODE_COORD(eInt, a, r) {                        \
982
983
984
985
986
987
988

989
990
991
992
993
994
995
  if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){
    pInfo->iRowid = readInt64(pCellData);
  }
  assert( nCoord>=2 && (nCoord&1)==0 );
  i = 0;
  do{
    pCellData += 8;

    RTREE_DECODE_COORD(eInt, pCellData, aCoord[i]);
    RTREE_DECODE_COORD(eInt, (pCellData+4), aCoord[i+1]);
    i+= 2;
  }while( i<nCoord );
  if( pConstraint->op==RTREE_MATCH ){
    rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo,
                              nCoord, aCoord, &i);







>







1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
  if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){
    pInfo->iRowid = readInt64(pCellData);
  }
  assert( nCoord>=2 && (nCoord&1)==0 );
  i = 0;
  do{
    pCellData += 8;
    assert( ((((char*)pCellData) - (char*)0)&3)==0 );  /* 4-byte aligned */
    RTREE_DECODE_COORD(eInt, pCellData, aCoord[i]);
    RTREE_DECODE_COORD(eInt, (pCellData+4), aCoord[i+1]);
    i+= 2;
  }while( i<nCoord );
  if( pConstraint->op==RTREE_MATCH ){
    rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo,
                              nCoord, aCoord, &i);
1025
1026
1027
1028
1029
1030
1031

1032
1033
1034
1035
1036
1037
1038
  /* p->iCoord might point to either a lower or upper bound coordinate
  ** in a coordinate pair.  But make pCellData point to the lower bound.
  */
  pCellData += 8 + 4*(p->iCoord&0xfe);

  assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE 
      || p->op==RTREE_GT || p->op==RTREE_EQ );

  switch( p->op ){
    case RTREE_LE:
    case RTREE_LT:
    case RTREE_EQ:
      RTREE_DECODE_COORD(eInt, pCellData, val);
      /* val now holds the lower bound of the coordinate pair */
      if( p->u.rValue>=val ) return;







>







1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
  /* p->iCoord might point to either a lower or upper bound coordinate
  ** in a coordinate pair.  But make pCellData point to the lower bound.
  */
  pCellData += 8 + 4*(p->iCoord&0xfe);

  assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE 
      || p->op==RTREE_GT || p->op==RTREE_EQ );
  assert( ((((char*)pCellData) - (char*)0)&3)==0 );  /* 4-byte aligned */
  switch( p->op ){
    case RTREE_LE:
    case RTREE_LT:
    case RTREE_EQ:
      RTREE_DECODE_COORD(eInt, pCellData, val);
      /* val now holds the lower bound of the coordinate pair */
      if( p->u.rValue>=val ) return;
1065
1066
1067
1068
1069
1070
1071

1072
1073
1074
1075
1076
1077
1078
  int *peWithin              /* Adjust downward, as appropriate */
){
  RtreeDValue xN;      /* Coordinate value converted to a double */

  assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE 
      || p->op==RTREE_GT || p->op==RTREE_EQ );
  pCellData += 8 + p->iCoord*4;

  RTREE_DECODE_COORD(eInt, pCellData, xN);
  switch( p->op ){
    case RTREE_LE: if( xN <= p->u.rValue ) return;  break;
    case RTREE_LT: if( xN <  p->u.rValue ) return;  break;
    case RTREE_GE: if( xN >= p->u.rValue ) return;  break;
    case RTREE_GT: if( xN >  p->u.rValue ) return;  break;
    default:       if( xN == p->u.rValue ) return;  break;







>







1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
  int *peWithin              /* Adjust downward, as appropriate */
){
  RtreeDValue xN;      /* Coordinate value converted to a double */

  assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE 
      || p->op==RTREE_GT || p->op==RTREE_EQ );
  pCellData += 8 + p->iCoord*4;
  assert( ((((char*)pCellData) - (char*)0)&3)==0 );  /* 4-byte aligned */
  RTREE_DECODE_COORD(eInt, pCellData, xN);
  switch( p->op ){
    case RTREE_LE: if( xN <= p->u.rValue ) return;  break;
    case RTREE_LT: if( xN <  p->u.rValue ) return;  break;
    case RTREE_GE: if( xN >= p->u.rValue ) return;  break;
    case RTREE_GT: if( xN >  p->u.rValue ) return;  break;
    default:       if( xN == p->u.rValue ) return;  break;