SQLite

Check-in [6d1e2372fe]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The sqlite_stat2.cnt field is parsed if it is present. But it is not yet used. A large comment added to analyze.c to explain the format of the ANALYZE system tables.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | query-planner-tweaks
Files: files | file ages | folders
SHA1: 6d1e2372fe73f4c04561108aac6bc8c95f6e7a1a
User & Date: drh 2011-08-06 19:48:53.187
Context
2011-08-07
00:21
The ANALYZE command adds the sqlite_stat2.cnt column if it does not already exist. (check-in: 794fde6f91 user: drh tags: query-planner-tweaks)
2011-08-06
19:48
The sqlite_stat2.cnt field is parsed if it is present. But it is not yet used. A large comment added to analyze.c to explain the format of the ANALYZE system tables. (check-in: 6d1e2372fe user: drh tags: query-planner-tweaks)
02:03
Merge together the fork in the query-planner-tweaks branch. (check-in: 2daab6bd42 user: drh tags: query-planner-tweaks)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/analyze.c.
1
2
3
4
5
6
7
8
9
10
11
12





























































































13
14
15
16
17
18
19
/*
** 2005 July 8
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains code associated with the ANALYZE command.





























































































*/
#ifndef SQLITE_OMIT_ANALYZE
#include "sqliteInt.h"

/*
** This routine generates code that opens the sqlite_stat1 table for
** writing with cursor iStatCur. If the library was built with the












>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/*
** 2005 July 8
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains code associated with the ANALYZE command.
**
** The ANALYZE command gather statistics about the content of tables
** and indices.  These statistics are made available to the query planner
** to help it make better decisions about the best way to implement a
** query.
**
** Two system tables are created as follows:
**
**    CREATE TABLE sqlite_stat1(tbl, idx, stat);
**    CREATE TABLE sqlite_stat2(tbl, idx, sampleno, sample, cnt);
**
** Additional tables might be added in future releases of SQLite.
** The sqlite_stat2 table is only created and used if SQLite is
** compiled with SQLITE_ENABLE_STAT2.  Older versions of SQLite
** omit the sqlite_stat2.cnt column.  Newer versions of SQLite are
** able to use older versions of the stat2 table that lack the cnt
** column.
**
** Format of sqlite_stat1:
**
** There is normally one row per index, with the index identified by the
** name in the idx column.  The tbl column is the name of the table to
** which the index belongs.  In each such row, the stat column will be
** a string consisting of a list of integers.  The first integer in this
** list is the number of rows in the index and in the table.  The second
** integer is the average number of rows in the index that have the same
** value in the first column of the index.  The third integer is the average
** number of rows in the index that have the same value for the first two
** columns.  The N-th integer (for N>1) is the average number of rows in 
** the index which have the same value for the first N-1 columns.  For
** a K-column index, there will be K+1 integers in the stat column.  If
** the index is unique, then the last integer will be 1.
**
** The list of integers in the stat column can optionally be followed
** by the keyword "unordered".  The "unordered" keyword, if it is present,
** must be separated from the last integer by a single space.  If the
** "unordered" keyword is present, then the query planner assumes that
** the index is unordered and will not use the index for a range query.
** 
** If the sqlite_stat1.idx column is NULL, then the sqlite_stat1.stat
** column contains a single integer which is the (estimated) number of
** rows in the table identified by sqlite_stat1.tbl.
**
** Format of sqlite_stat2:
**
** The sqlite_stat2 is only created and is only used if SQLite is compiled
** with SQLITE_ENABLE_STAT2.  The "stat2" table contains additional information
** about the key distribution within an index.  The index is identified by
** the "idx" column and the "tbl" column is the name of the table to which
** the index belongs.  There are usually multiple rows in the sqlite_stat2
** table for each index.
**
** The sqlite_stat2 entires for an index that have sampleno>=0 are
** sampled key values for the first column of the index taken at
** intervals along the index.  The sqlite_stat2.sample column holds
** the value of the key in the left-most column of the index.
**
** The samples are numbered from 0 to S-1
** where S is 10 by default.  The number of samples created by the
** ANALYZE command can be adjusted at compile-time using the
** SQLITE_INDEX_SAMPLES macro.  The maximum number of samples is
** SQLITE_MAX_SAMPLES, currently set to 100.  There are places in the
** code that use an unsigned character to count samples, so an upper
** bound on SQLITE_MAX_SAMPLES is 255.
**
** Suppose the index contains C rows.  And let the number
** of samples be S.  SQLite assumes that the samples are taken from the
** following rows for i between 0 and S-1:
**
**     rownumber = (i*C*2 + C)/(S*2)
**
** Conceptually, the index is divided into S bins and the sample is
** taken from the middle of each bin.  The ANALYZE will not attempt
** to populate sqlite_stat2 for an index that holds fewer than S*2
** entries.
**
** If the key value for a sample (the sqlite_stat2.sample column) is a 
** large string or blob, SQLite will only use the first 255 bytes of 
** that string or blob.
** 
** The sqlite_stat2.cnt column contains the number of entries in the
** index for which sqlite_stat2.sample matches the left-most column
** of the index.  In other words, sqlite_stat2.cnt holds the number of
** times the sqlite_stat2.sample value appears in the index..  Many 
** older versions of SQLite omit the sqlite_stat2.cnt column.
**
** If the sqlite_stat2.sampleno value is -1, then that row holds a first-
** column key that is a frequently used key in the index.  The
** sqlite_stat2.cnt column will hold the number of occurrances of that key.
** This information is useful to the query planner in cases where a
** large percentage of the rows in indexed field have one of a small
** handful of value but the balance of the rows in the index have
** distinct or nearly distinct keys.
*/
#ifndef SQLITE_OMIT_ANALYZE
#include "sqliteInt.h"

/*
** This routine generates code that opens the sqlite_stat1 table for
** writing with cursor iStatCur. If the library was built with the
578
579
580
581
582
583
584

585









586








587


588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606























607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631

632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665

666
667
668
669
670
671
672
673
674

675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706

707
708
709

710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
      pIndex->bUnordered = 1;
      break;
    }
  }
  return 0;
}


/*









** If the Index.aSample variable is not NULL, delete the aSample[] array








** and its contents.


*/
void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
#ifdef SQLITE_ENABLE_STAT2
  if( pIdx->aSample ){
    int j;
    for(j=0; j<pIdx->nSample; j++){
      IndexSample *p = &pIdx->aSample[j];
      if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){
        sqlite3DbFree(db, p->u.z);
      }
    }
    sqlite3DbFree(db, pIdx->aSample);
  }
#else
  UNUSED_PARAMETER(db);
  UNUSED_PARAMETER(pIdx);
#endif
}
























/*
** Load the content of the sqlite_stat1 and sqlite_stat2 tables. The
** contents of sqlite_stat1 are used to populate the Index.aiRowEst[]
** arrays. The contents of sqlite_stat2 are used to populate the
** Index.aSample[] arrays.
**
** If the sqlite_stat1 table is not present in the database, SQLITE_ERROR
** is returned. In this case, even if SQLITE_ENABLE_STAT2 was defined 
** during compilation and the sqlite_stat2 table is present, no data is 
** read from it.
**
** If SQLITE_ENABLE_STAT2 was defined during compilation and the 
** sqlite_stat2 table is not present in the database, SQLITE_ERROR is
** returned. However, in this case, data is read from the sqlite_stat1
** table (if it is present) before returning.
**
** If an OOM error occurs, this function always sets db->mallocFailed.
** This means if the caller does not care about other errors, the return
** code may be ignored.
*/
int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
  analysisInfo sInfo;
  HashElem *i;
  char *zSql;
  int rc;


  assert( iDb>=0 && iDb<db->nDb );
  assert( db->aDb[iDb].pBt!=0 );

  /* Clear any prior statistics */
  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
    Index *pIdx = sqliteHashData(i);
    sqlite3DefaultRowEst(pIdx);
    sqlite3DeleteIndexSamples(db, pIdx);
    pIdx->aSample = 0;
    pIdx->nSample = 0;
  }

  /* Check to make sure the sqlite_stat1 table exists */
  sInfo.db = db;
  sInfo.zDatabase = db->aDb[iDb].zName;
  if( sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)==0 ){
    return SQLITE_ERROR;
  }

  /* Load new statistics out of the sqlite_stat1 table */
  zSql = sqlite3MPrintf(db, 
      "SELECT tbl, idx, stat FROM %Q.sqlite_stat1", sInfo.zDatabase);
  if( zSql==0 ){
    rc = SQLITE_NOMEM;
  }else{
    rc = sqlite3_exec(db, zSql, analysisLoader, &sInfo, 0);
    sqlite3DbFree(db, zSql);
  }


  /* Load the statistics from the sqlite_stat2 table. */
#ifdef SQLITE_ENABLE_STAT2

  if( rc==SQLITE_OK && !sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase) ){
    rc = SQLITE_ERROR;
  }
  if( rc==SQLITE_OK ){
    sqlite3_stmt *pStmt = 0;

    zSql = sqlite3MPrintf(db, 
        "SELECT idx, sampleno, sample FROM %Q.sqlite_stat2"
        " ORDER BY rowid DESC", sInfo.zDatabase);

    if( !zSql ){
      rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
      sqlite3DbFree(db, zSql);
    }

    if( rc==SQLITE_OK ){
      while( sqlite3_step(pStmt)==SQLITE_ROW ){
        char *zIndex;   /* Index name */
        Index *pIdx;    /* Pointer to the index object */
        int iSample;
        int eType;
        IndexSample *pSample;

        zIndex = (char *)sqlite3_column_text(pStmt, 0);
        if( zIndex==0 ) continue;
        pIdx = sqlite3FindIndex(db, zIndex, sInfo.zDatabase);
        if( pIdx==0 ) continue;
        iSample = sqlite3_column_int(pStmt, 1);
        if( iSample>=SQLITE_MAX_SAMPLES || iSample<0 ) continue;
        if( pIdx->nSample<=iSample ){
          IndexSample *pNew;
          int sz = sizeof(IndexSample)*(iSample+1);
          pNew = (IndexSample*)sqlite3Realloc(pIdx->aSample, sz);
          if( pNew==0 ){
            db->mallocFailed = 1;
            break;
          }
          pIdx->aSample = pNew;
          pIdx->nSample = iSample+1;
        }

        eType = sqlite3_column_type(pStmt, 2);
        pSample = &pIdx->aSample[iSample];
        pSample->eType = (u8)eType;

        if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
          pSample->u.r = sqlite3_column_double(pStmt, 2);
        }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
          const char *z = (const char *)(
             (eType==SQLITE_BLOB) ?
              sqlite3_column_blob(pStmt, 2):
              sqlite3_column_text(pStmt, 2)
          );
          int n = sqlite3_column_bytes(pStmt, 2);
          if( n>24 ) n = 24;
          pSample->nByte = (u8)n;
          if( n < 1){
            pSample->u.z = 0;
          }else{
            pSample->u.z = sqlite3DbStrNDup(0, z, n);
            if( pSample->u.z==0 ){
              db->mallocFailed = 1;







>

>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
|
>
>



|
<
<
|
<
<
<
<
<
<






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|




















>










<
<





|
















>
|






|
|
>




















|
|
|
<
<
<
<
<
|
|
<

>

<

>









|







671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704


705






706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770


771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826





827
828

829
830
831

832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
      pIndex->bUnordered = 1;
      break;
    }
  }
  return 0;
}

#if SQLITE_ENABLE_STAT2
/*
** Delete an array of IndexSample objects
*/
static void deleteIndexSampleArray(
  sqlite3 *db,                 /* The database connection */
  IndexSampleArray *pArray     /* Array of IndexSample objects */
){
  int j;
  if( pArray->a==0 ) return;
  for(j=0; j<pArray->n; j++){
    IndexSample *p = &pArray->a[j];
    if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){
      sqlite3_free(p->u.z);
    }
  }
  sqlite3_free(pArray->a);
  memset(pArray, 0, sizeof(*pArray));
}
#endif

/*
** Delete the sample and common-key arrays from the index.
*/
void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
#ifdef SQLITE_ENABLE_STAT2
  deleteIndexSampleArray(db, &pIdx->sample);


  deleteIndexSampleArray(db, &pIdx->comkey);






#else
  UNUSED_PARAMETER(db);
  UNUSED_PARAMETER(pIdx);
#endif
}

#ifdef SQLITE_ENABLE_STAT2
/*
** Enlarge an array of IndexSample objects.
*/
static IndexSample *allocIndexSample(
  sqlite3 *db,              /* Database connection to malloc against */
  IndexSampleArray *pArray, /* The array to enlarge */
  int i                     /* Return this element */
){
  IndexSample *p;
  if( i>=pArray->nAlloc ){
    int szNew = i+1;
    p = (IndexSample*)sqlite3_realloc(pArray->a, szNew*sizeof(IndexSample));
    if( p==0 ) return 0;
    pArray->a = p;
    memset(&pArray->a[pArray->n], 0, (szNew-(pArray->n))*sizeof(IndexSample));
    pArray->nAlloc = szNew;
  }
  if( i>=pArray->n ) pArray->n = i+1;
  return &pArray->a[i];
}
#endif

/*
** Load the content of the sqlite_stat1 and sqlite_stat2 tables. The
** contents of sqlite_stat1 are used to populate the Index.aiRowEst[]
** arrays. The contents of sqlite_stat2 are used to populate the
** Index.sample and Index.comkey arrays.
**
** If the sqlite_stat1 table is not present in the database, SQLITE_ERROR
** is returned. In this case, even if SQLITE_ENABLE_STAT2 was defined 
** during compilation and the sqlite_stat2 table is present, no data is 
** read from it.
**
** If SQLITE_ENABLE_STAT2 was defined during compilation and the 
** sqlite_stat2 table is not present in the database, SQLITE_ERROR is
** returned. However, in this case, data is read from the sqlite_stat1
** table (if it is present) before returning.
**
** If an OOM error occurs, this function always sets db->mallocFailed.
** This means if the caller does not care about other errors, the return
** code may be ignored.
*/
int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
  analysisInfo sInfo;
  HashElem *i;
  char *zSql;
  int rc;
  Table *pTab;    /* Stat1 or Stat2 table */

  assert( iDb>=0 && iDb<db->nDb );
  assert( db->aDb[iDb].pBt!=0 );

  /* Clear any prior statistics */
  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
    Index *pIdx = sqliteHashData(i);
    sqlite3DefaultRowEst(pIdx);
    sqlite3DeleteIndexSamples(db, pIdx);


  }

  /* Check to make sure the sqlite_stat1 table exists */
  sInfo.db = db;
  sInfo.zDatabase = db->aDb[iDb].zName;
  if( (pTab=sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase))==0 ){
    return SQLITE_ERROR;
  }

  /* Load new statistics out of the sqlite_stat1 table */
  zSql = sqlite3MPrintf(db, 
      "SELECT tbl, idx, stat FROM %Q.sqlite_stat1", sInfo.zDatabase);
  if( zSql==0 ){
    rc = SQLITE_NOMEM;
  }else{
    rc = sqlite3_exec(db, zSql, analysisLoader, &sInfo, 0);
    sqlite3DbFree(db, zSql);
  }


  /* Load the statistics from the sqlite_stat2 table. */
#ifdef SQLITE_ENABLE_STAT2
  if( rc==SQLITE_OK 
    && (pTab=sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase))==0 ){
    rc = SQLITE_ERROR;
  }
  if( rc==SQLITE_OK ){
    sqlite3_stmt *pStmt = 0;

    zSql = sqlite3MPrintf(db, 
        "SELECT idx, sampleno, sample, %s FROM %Q.sqlite_stat2"
        " ORDER BY rowid DESC",
        pTab->nCol>=5 ? "cnt" : "0", sInfo.zDatabase);
    if( !zSql ){
      rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
      sqlite3DbFree(db, zSql);
    }

    if( rc==SQLITE_OK ){
      while( sqlite3_step(pStmt)==SQLITE_ROW ){
        char *zIndex;   /* Index name */
        Index *pIdx;    /* Pointer to the index object */
        int iSample;
        int eType;
        IndexSample *pSample;

        zIndex = (char *)sqlite3_column_text(pStmt, 0);
        if( zIndex==0 ) continue;
        pIdx = sqlite3FindIndex(db, zIndex, sInfo.zDatabase);
        if( pIdx==0 ) continue;
        iSample = sqlite3_column_int(pStmt, 1);
        if( iSample>=SQLITE_MAX_SAMPLES ) continue;
        if( iSample<0 ){
          pSample = allocIndexSample(db, &pIdx->comkey, pIdx->comkey.n);





        }else{
          pSample = allocIndexSample(db, &pIdx->sample, iSample);

        }
        if( pSample==0 ) break;
        eType = sqlite3_column_type(pStmt, 2);

        pSample->eType = (u8)eType;
        pSample->nCopy = sqlite3_column_int(pStmt, 4);
        if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
          pSample->u.r = sqlite3_column_double(pStmt, 2);
        }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
          const char *z = (const char *)(
             (eType==SQLITE_BLOB) ?
              sqlite3_column_blob(pStmt, 2):
              sqlite3_column_text(pStmt, 2)
          );
          int n = sqlite3_column_bytes(pStmt, 2);
          if( n>255 ) n = 255;
          pSample->nByte = (u8)n;
          if( n < 1){
            pSample->u.z = 0;
          }else{
            pSample->u.z = sqlite3DbStrNDup(0, z, n);
            if( pSample->u.z==0 ){
              db->mallocFailed = 1;
Changes to src/sqliteInt.h.
611
612
613
614
615
616
617

618
619
620
621
622
623
624
typedef struct FKey FKey;
typedef struct FuncDestructor FuncDestructor;
typedef struct FuncDef FuncDef;
typedef struct FuncDefHash FuncDefHash;
typedef struct IdList IdList;
typedef struct Index Index;
typedef struct IndexSample IndexSample;

typedef struct KeyClass KeyClass;
typedef struct KeyInfo KeyInfo;
typedef struct Lookaside Lookaside;
typedef struct LookasideSlot LookasideSlot;
typedef struct Module Module;
typedef struct NameContext NameContext;
typedef struct Parse Parse;







>







611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
typedef struct FKey FKey;
typedef struct FuncDestructor FuncDestructor;
typedef struct FuncDef FuncDef;
typedef struct FuncDefHash FuncDefHash;
typedef struct IdList IdList;
typedef struct Index Index;
typedef struct IndexSample IndexSample;
typedef struct IndexSampleArray IndexSampleArray;
typedef struct KeyClass KeyClass;
typedef struct KeyInfo KeyInfo;
typedef struct Lookaside Lookaside;
typedef struct LookasideSlot LookasideSlot;
typedef struct Module Module;
typedef struct NameContext NameContext;
typedef struct Parse Parse;
1446
1447
1448
1449
1450
1451
1452























1453
1454
1455
1456
1457
1458
1459
#define UNPACKED_NEED_FREE     0x0001  /* Memory is from sqlite3Malloc() */
#define UNPACKED_NEED_DESTROY  0x0002  /* apMem[]s should all be destroyed */
#define UNPACKED_IGNORE_ROWID  0x0004  /* Ignore trailing rowid on key1 */
#define UNPACKED_INCRKEY       0x0008  /* Make this key an epsilon larger */
#define UNPACKED_PREFIX_MATCH  0x0010  /* A prefix match is considered OK */
#define UNPACKED_PREFIX_SEARCH 0x0020  /* A prefix match is considered OK */
























/*
** Each SQL index is represented in memory by an
** instance of the following structure.
**
** The columns of the table that are to be indexed are described
** by the aiColumn[] field of this structure.  For example, suppose
** we have the following table and index:







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
#define UNPACKED_NEED_FREE     0x0001  /* Memory is from sqlite3Malloc() */
#define UNPACKED_NEED_DESTROY  0x0002  /* apMem[]s should all be destroyed */
#define UNPACKED_IGNORE_ROWID  0x0004  /* Ignore trailing rowid on key1 */
#define UNPACKED_INCRKEY       0x0008  /* Make this key an epsilon larger */
#define UNPACKED_PREFIX_MATCH  0x0010  /* A prefix match is considered OK */
#define UNPACKED_PREFIX_SEARCH 0x0020  /* A prefix match is considered OK */

/*
** Each sample stored in the sqlite_stat2 table is represented in memory 
** using a structure of this type.
*/
struct IndexSample {
  union {
    char *z;        /* Value if eType is SQLITE_TEXT or SQLITE_BLOB */
    double r;       /* Value if eType is SQLITE_FLOAT or SQLITE_INTEGER */
  } u;
  u8 eType;         /* SQLITE_NULL, SQLITE_INTEGER ... etc. */
  u8 nByte;         /* Size in byte of text or blob. */
  u32 nCopy;        /* How many copies of this sample are in the database */
};

/*
** An array of IndexSample elements is as follows:
*/
struct IndexSampleArray {
  u16 n;            /* Number of elements in the array */
  u16 nAlloc;       /* Space allocated to a[] */
  IndexSample *a;   /* The samples */
};

/*
** Each SQL index is represented in memory by an
** instance of the following structure.
**
** The columns of the table that are to be indexed are described
** by the aiColumn[] field of this structure.  For example, suppose
** we have the following table and index:
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
  int *aiColumn;   /* Which columns are used by this index.  1st is 0 */
  unsigned *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */
  Table *pTable;   /* The SQL table being indexed */
  int tnum;        /* Page containing root of this index in database file */
  u8 onError;      /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
  u8 autoIndex;    /* True if is automatically created (ex: by UNIQUE) */
  u8 bUnordered;   /* Use this index for == or IN queries only */
  u8 nSample;      /* Number of slots in aSample[] */
  char *zColAff;   /* String defining the affinity of each column */
  Index *pNext;    /* The next index associated with the same table */
  Schema *pSchema; /* Schema containing this index */
  u8 *aSortOrder;  /* Array of size Index.nColumn. True==DESC, False==ASC */
  char **azColl;   /* Array of collation sequence names for index */
  IndexSample *aSample;    /* Array of SQLITE_INDEX_SAMPLES samples */
};

/*
** Each sample stored in the sqlite_stat2 table is represented in memory 
** using a structure of this type.
*/
struct IndexSample {
  union {
    char *z;        /* Value if eType is SQLITE_TEXT or SQLITE_BLOB */
    double r;       /* Value if eType is SQLITE_FLOAT or SQLITE_INTEGER */
  } u;
  u8 eType;         /* SQLITE_NULL, SQLITE_INTEGER ... etc. */
  u8 nByte;         /* Size in byte of text or blob. */
};

/*
** Each token coming out of the lexer is an instance of
** this structure.  Tokens are also used as part of an expression.
**
** Note if Token.z==0 then Token.dyn and Token.n are undefined and







<





|
<
|
<
<
<
<
|
|
<
<
<
<
<







1506
1507
1508
1509
1510
1511
1512

1513
1514
1515
1516
1517
1518

1519




1520
1521





1522
1523
1524
1525
1526
1527
1528
  int *aiColumn;   /* Which columns are used by this index.  1st is 0 */
  unsigned *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */
  Table *pTable;   /* The SQL table being indexed */
  int tnum;        /* Page containing root of this index in database file */
  u8 onError;      /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
  u8 autoIndex;    /* True if is automatically created (ex: by UNIQUE) */
  u8 bUnordered;   /* Use this index for == or IN queries only */

  char *zColAff;   /* String defining the affinity of each column */
  Index *pNext;    /* The next index associated with the same table */
  Schema *pSchema; /* Schema containing this index */
  u8 *aSortOrder;  /* Array of size Index.nColumn. True==DESC, False==ASC */
  char **azColl;   /* Array of collation sequence names for index */
#ifdef SQLITE_ENABLE_STAT2

  IndexSampleArray sample;  /* Sampled histogram for the first column */




  IndexSampleArray comkey;  /* The most common keys */
#endif





};

/*
** Each token coming out of the lexer is an instance of
** this structure.  Tokens are also used as part of an expression.
**
** Note if Token.z==0 then Token.dyn and Token.n are undefined and
Changes to src/where.c.
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
  */
  bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */

/*
** Argument pIdx is a pointer to an index structure that has an array of
** pIdx->nSample evenly spaced samples of the first indexed column
** stored in Index.aSample. These samples divide the domain of values stored
** the index into (pIdx->nSample+1) regions.
** Region 0 contains all values less than the first sample value. Region
** 1 contains values between the first and second samples.  Region 2 contains
** values between samples 2 and 3.  And so on.  Region pIdx->nSample
** contains values larger than the last sample.
**
** If the index contains many duplicates of a single value, then it is
** possible that two or more adjacent samples can hold the same value.
** When that is the case, the smallest possible region code is returned
** when roundUp is false and the largest possible region code is returned
** when roundUp is true.







|
|
|


|







2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
  */
  bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */

/*
** Argument pIdx is a pointer to an index structure that has an array of
** pIdx->sample.n evenly spaced samples of the first indexed column
** stored in Index.sample. These samples divide the domain of values stored
** the index into (pIdx->sample.n+1) regions.
** Region 0 contains all values less than the first sample value. Region
** 1 contains values between the first and second samples.  Region 2 contains
** values between samples 2 and 3.  And so on.  Region pIdx->sample.n
** contains values larger than the last sample.
**
** If the index contains many duplicates of a single value, then it is
** possible that two or more adjacent samples can hold the same value.
** When that is the case, the smallest possible region code is returned
** when roundUp is false and the largest possible region code is returned
** when roundUp is true.
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
  Index *pIdx,                /* Index to consider domain of */
  sqlite3_value *pVal,        /* Value to consider */
  int roundUp,                /* Return largest valid region if true */
  int *piRegion               /* OUT: Region of domain in which value lies */
){
  assert( roundUp==0 || roundUp==1 );
  if( ALWAYS(pVal) ){
    IndexSample *aSample = pIdx->aSample;
    int nSample = pIdx->nSample;
    int i = 0;
    int eType = sqlite3_value_type(pVal);

    assert( nSample>0 );
    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
      double r = sqlite3_value_double(pVal);
      for(i=0; i<nSample; i++){







|
|







2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
  Index *pIdx,                /* Index to consider domain of */
  sqlite3_value *pVal,        /* Value to consider */
  int roundUp,                /* Return largest valid region if true */
  int *piRegion               /* OUT: Region of domain in which value lies */
){
  assert( roundUp==0 || roundUp==1 );
  if( ALWAYS(pVal) ){
    IndexSample *aSample = pIdx->sample.a;
    int nSample = pIdx->sample.n;
    int i = 0;
    int eType = sqlite3_value_type(pVal);

    assert( nSample>0 );
    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
      double r = sqlite3_value_double(pVal);
      for(i=0; i<nSample; i++){
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
        {
          c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
        }
        if( c-roundUp>=0 ) break;
      }
    }

    assert( i>=0 && i<=pIdx->nSample );
    *piRegion = i;
  }
  return SQLITE_OK;
}
#endif   /* #ifdef SQLITE_ENABLE_STAT2 */

/*







|







2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
        {
          c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
        }
        if( c-roundUp>=0 ) break;
      }
    }

    assert( i>=0 && i<=pIdx->sample.n );
    *piRegion = i;
  }
  return SQLITE_OK;
}
#endif   /* #ifdef SQLITE_ENABLE_STAT2 */

/*
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
  WhereTerm *pUpper,   /* Upper bound on the range. ex: "x<455" Might be NULL */
  int *piEst           /* OUT: Return value */
){
  int rc = SQLITE_OK;

#ifdef SQLITE_ENABLE_STAT2

  if( nEq==0 && p->aSample ){
    sqlite3_value *pLowerVal = 0;
    sqlite3_value *pUpperVal = 0;
    int iEst;
    int iLower = 0;
    int nSample = p->nSample;
    int iUpper = p->nSample;
    int roundUpUpper = 0;
    int roundUpLower = 0;
    u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;

    if( pLower ){
      Expr *pExpr = pLower->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);







|




|
|







2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
  WhereTerm *pUpper,   /* Upper bound on the range. ex: "x<455" Might be NULL */
  int *piEst           /* OUT: Return value */
){
  int rc = SQLITE_OK;

#ifdef SQLITE_ENABLE_STAT2

  if( nEq==0 && p->sample.a ){
    sqlite3_value *pLowerVal = 0;
    sqlite3_value *pUpperVal = 0;
    int iEst;
    int iLower = 0;
    int nSample = p->sample.n;
    int iUpper = p->sample.n;
    int roundUpUpper = 0;
    int roundUpLower = 0;
    u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;

    if( pLower ){
      Expr *pExpr = pLower->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
      sqlite3ValueFree(pUpperVal);
      goto range_est_fallback;
    }else if( pLowerVal==0 ){
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( pLower ) iLower = iUpper/2;
    }else if( pUpperVal==0 ){
      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      if( pUpper ) iUpper = (iLower + p->nSample + 1)/2;
    }else{
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( rc==SQLITE_OK ){
        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      }
    }
    WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));







|







2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
      sqlite3ValueFree(pUpperVal);
      goto range_est_fallback;
    }else if( pLowerVal==0 ){
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( pLower ) iLower = iUpper/2;
    }else if( pUpperVal==0 ){
      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      if( pUpper ) iUpper = (iLower + p->sample.n + 1)/2;
    }else{
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( rc==SQLITE_OK ){
        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      }
    }
    WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
){
  sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->aSample!=0 );
  assert( p->nSample>0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  if( pExpr ){
    rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
    if( rc ) goto whereEqualScanEst_cancel;
  }else{
    pRhs = sqlite3ValueNew(pParse->db);
  }
  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
    nRowEst = p->aiRowEst[0]/(p->nSample*3);
    if( nRowEst<*pnRow ) *pnRow = nRowEst;
  }else{
    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->nSample;
    *pnRow = nRowEst;
  }

whereEqualScanEst_cancel:
  sqlite3ValueFree(pRhs);
  return rc;
}







|
|














|


|







2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
){
  sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->sample.a!=0 );
  assert( p->sample.n>0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  if( pExpr ){
    rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
    if( rc ) goto whereEqualScanEst_cancel;
  }else{
    pRhs = sqlite3ValueNew(pParse->db);
  }
  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
    nRowEst = p->aiRowEst[0]/(p->sample.n*3);
    if( nRowEst<*pnRow ) *pnRow = nRowEst;
  }else{
    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->sample.n;
    *pnRow = nRowEst;
  }

whereEqualScanEst_cancel:
  sqlite3ValueFree(pRhs);
  return rc;
}
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
  u8 aff;                   /* Column affinity */
  int rc = SQLITE_OK;       /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */
  int nSpan = 0;            /* Number of histogram regions spanned */
  int nSingle = 0;          /* Histogram regions hit by a single value */
  int nNotFound = 0;        /* Count of values that are not constants */
  int i;                             /* Loop counter */
  int nSample = p->nSample;          /* Number of samples */
  u8 aSpan[SQLITE_MAX_SAMPLES+1];    /* Histogram regions that are spanned */
  u8 aSingle[SQLITE_MAX_SAMPLES+1];  /* Histogram regions hit once */

  assert( p->aSample!=0 );
  assert( nSample>0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  memset(aSpan, 0, nSample+1);
  memset(aSingle, 0, nSample+1);
  for(i=0; i<pList->nExpr; i++){
    sqlite3ValueFree(pVal);
    rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);







|



|







2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
  u8 aff;                   /* Column affinity */
  int rc = SQLITE_OK;       /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */
  int nSpan = 0;            /* Number of histogram regions spanned */
  int nSingle = 0;          /* Histogram regions hit by a single value */
  int nNotFound = 0;        /* Count of values that are not constants */
  int i;                             /* Loop counter */
  int nSample = p->sample.n;         /* Number of samples */
  u8 aSpan[SQLITE_MAX_SAMPLES+1];    /* Histogram regions that are spanned */
  u8 aSingle[SQLITE_MAX_SAMPLES+1];  /* Histogram regions hit once */

  assert( p->sample.a!=0 );
  assert( nSample>0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  memset(aSpan, 0, nSample+1);
  memset(aSingle, 0, nSample+1);
  for(i=0; i<pList->nExpr; i++){
    sqlite3ValueFree(pVal);
    rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
          /* "x IN (value, value, ...)" */
          nInMul *= pExpr->x.pList->nExpr;
        }
      }else if( pTerm->eOperator & WO_ISNULL ){
        wsFlags |= WHERE_COLUMN_NULL;
      }
#ifdef SQLITE_ENABLE_STAT2
      if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm;
#endif
      used |= pTerm->prereqRight;
    }

    /* Determine the value of estBound. */
    if( nEq<pProbe->nColumn && pProbe->bUnordered==0 ){
      int j = pProbe->aiColumn[nEq];







|







3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
          /* "x IN (value, value, ...)" */
          nInMul *= pExpr->x.pList->nExpr;
        }
      }else if( pTerm->eOperator & WO_ISNULL ){
        wsFlags |= WHERE_COLUMN_NULL;
      }
#ifdef SQLITE_ENABLE_STAT2
      if( nEq==0 && pProbe->sample.a ) pFirstTerm = pTerm;
#endif
      used |= pTerm->prereqRight;
    }

    /* Determine the value of estBound. */
    if( nEq<pProbe->nColumn && pProbe->bUnordered==0 ){
      int j = pProbe->aiColumn[nEq];
Changes to test/analyze8.test.
157
158
159
160
161
162
163


  execsql {
    UPDATE t1 SET a=b;
    UPDATE t1 SET a=20 WHERE b>2;
    ANALYZE;
    SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno;
  }
} {2 2 20 38 20 38 20 38 20 38 20 38 20 38 20 38 20 38 20 38}









>
>
157
158
159
160
161
162
163
164
165
  execsql {
    UPDATE t1 SET a=b;
    UPDATE t1 SET a=20 WHERE b>2;
    ANALYZE;
    SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno;
  }
} {2 2 20 38 20 38 20 38 20 38 20 38 20 38 20 38 20 38 20 38}

finish_test