/ Check-in [941647d1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In fts4, store the total number of bytes of for all records in the table in the %_stat table.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1: 941647d121ac60e2eabc998cfe79b157fb918d7e
User & Date: dan 2010-10-27 10:55:54
Context
2010-10-27
16:52
Fix a buffer overread in fts3 that can occur if the database is corrupt. Closed-Leaf check-in: 84194c41 user: dan tags: experimental
10:55
In fts4, store the total number of bytes of for all records in the table in the %_stat table. check-in: 941647d1 user: dan tags: experimental
2010-10-26
18:42
Structural coverage tests for vdbeblob.c. Including experimental new API sqlite3_blob_reopen(). check-in: 97c6b261 user: dan tags: experimental
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_write.c.

579
580
581
582
583
584
585

586
587
588
589
590
591
592
...
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
...
698
699
700
701
702
703
704

705
706
707
708
709
710
711
....
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
....
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522










2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537


2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
....
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
    if( zText ){
      int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
      if( rc!=SQLITE_OK ){
        return rc;
      }
    }

  }
  return SQLITE_OK;
}

/*
** This function is called by the xUpdate() method for an INSERT operation.
** The apVal parameter is passed a copy of the apVal argument passed by
................................................................................
}

/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
*/
static void fts3DeleteTerms(
  int *pRC,               /* Result code */
  Fts3Table *p,           /* The FTS table to delete from */
  sqlite3_value **apVal,  /* apVal[] contains the docid to be deleted */
  u32 *aSz                /* Sizes of deleted document written here */
){
  int rc;
  sqlite3_stmt *pSelect;
................................................................................
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
        rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
        if( rc!=SQLITE_OK ){
          sqlite3_reset(pSelect);
          *pRC = rc;
          return;
        }

      }
    }
    rc = sqlite3_reset(pSelect);
  }else{
    sqlite3_reset(pSelect);
  }
  *pRC = rc;
................................................................................
        sqlite3_int64 nDoc = 0;
        sqlite3_int64 nByte = 0;
        const char *a = sqlite3_column_blob(pStmt, 0);
        if( a ){
          const char *pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
          a += sqlite3Fts3GetVarint(a, &nDoc);
          while( a<pEnd ){
            sqlite3_int64 nVarint;
            a += sqlite3Fts3GetVarint(a, &nVarint);
            nByte += nVarint;
          }
        }

        pCsr->nRowAvg = (((nByte / nDoc) + pgsz - 1) / pgsz);
      }
      rc = sqlite3_reset(pStmt);
      if( rc!=SQLITE_OK || pCsr->nRowAvg==0 ) return rc;
................................................................................
  sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
  sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
  sqlite3_step(pStmt);
  *pRC = sqlite3_reset(pStmt);
}

/*
** Update the 0 record of the %_stat table so that it holds a blob
** which contains the document count followed by the cumulative
** document sizes for all columns.










*/
static void fts3UpdateDocTotals(
  int *pRC,       /* The result code */
  Fts3Table *p,   /* Table being updated */
  u32 *aSzIns,    /* Size increases */
  u32 *aSzDel,    /* Size decreases */
  int nChng       /* Change in the number of documents */
){
  char *pBlob;             /* Storage for BLOB written into %_stat */
  int nBlob;               /* Size of BLOB written into %_stat */
  u32 *a;                  /* Array of integers that becomes the BLOB */
  sqlite3_stmt *pStmt;     /* Statement for reading and writing */
  int i;                   /* Loop counter */
  int rc;                  /* Result code from subfunctions */



  if( *pRC ) return;
  a = sqlite3_malloc( (sizeof(u32)+10)*(p->nColumn+1) );
  if( a==0 ){
    *pRC = SQLITE_NOMEM;
    return;
  }
  pBlob = (char*)&a[p->nColumn+1];
  rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
  if( rc ){
    sqlite3_free(a);
    *pRC = rc;
    return;
  }
  if( sqlite3_step(pStmt)==SQLITE_ROW ){
    fts3DecodeIntArray(p->nColumn+1, a,
         sqlite3_column_blob(pStmt, 0),
         sqlite3_column_bytes(pStmt, 0));
  }else{
    memset(a, 0, sizeof(u32)*(p->nColumn+1) );
  }
  sqlite3_reset(pStmt);
  if( nChng<0 && a[0]<(u32)(-nChng) ){
    a[0] = 0;
  }else{
    a[0] += nChng;
  }
  for(i=0; i<p->nColumn; i++){
    u32 x = a[i+1];
    if( x+aSzIns[i] < aSzDel[i] ){
      x = 0;
    }else{
      x = x + aSzIns[i] - aSzDel[i];
    }
    a[i+1] = x;
  }
  fts3EncodeIntArray(p->nColumn+1, a, pBlob, &nBlob);
  rc = fts3SqlStmt(p, SQL_REPLACE_DOCTOTAL, &pStmt, 0);
  if( rc ){
    sqlite3_free(a);
    *pRC = rc;
    return;
  }
  sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
................................................................................
  u32 *aSzIns;                    /* Sizes of inserted documents */
  u32 *aSzDel;                    /* Sizes of deleted documents */
  int nChng = 0;                  /* Net change in number of documents */

  assert( p->pSegments==0 );

  /* Allocate space to hold the change in document sizes */
  aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*p->nColumn*2 );
  if( aSzIns==0 ) return SQLITE_NOMEM;
  aSzDel = &aSzIns[p->nColumn];
  memset(aSzIns, 0, sizeof(aSzIns[0])*p->nColumn*2);

  /* If this is a DELETE or UPDATE operation, remove the old record. */
  if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
    int isEmpty = 0;
    rc = fts3IsEmpty(p, apVal, &isEmpty);
    if( rc==SQLITE_OK ){
      if( isEmpty ){







>







 







|







 







>







 







<
|
<







 







|
|
|
>
>
>
>
>
>
>
>
>
>


|
|
|
|
|








>
>

|




|







|



|







|








|







 







|

|
|







579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
...
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
...
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
....
1031
1032
1033
1034
1035
1036
1037

1038

1039
1040
1041
1042
1043
1044
1045
....
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
....
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
    if( zText ){
      int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
      if( rc!=SQLITE_OK ){
        return rc;
      }
    }
    aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
  }
  return SQLITE_OK;
}

/*
** This function is called by the xUpdate() method for an INSERT operation.
** The apVal parameter is passed a copy of the apVal argument passed by
................................................................................
}

/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
*/
static void fts3DeleteTerms( 
  int *pRC,               /* Result code */
  Fts3Table *p,           /* The FTS table to delete from */
  sqlite3_value **apVal,  /* apVal[] contains the docid to be deleted */
  u32 *aSz                /* Sizes of deleted document written here */
){
  int rc;
  sqlite3_stmt *pSelect;
................................................................................
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
        rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
        if( rc!=SQLITE_OK ){
          sqlite3_reset(pSelect);
          *pRC = rc;
          return;
        }
        aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
      }
    }
    rc = sqlite3_reset(pSelect);
  }else{
    sqlite3_reset(pSelect);
  }
  *pRC = rc;
................................................................................
        sqlite3_int64 nDoc = 0;
        sqlite3_int64 nByte = 0;
        const char *a = sqlite3_column_blob(pStmt, 0);
        if( a ){
          const char *pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
          a += sqlite3Fts3GetVarint(a, &nDoc);
          while( a<pEnd ){

            a += sqlite3Fts3GetVarint(a, &nByte);

          }
        }

        pCsr->nRowAvg = (((nByte / nDoc) + pgsz - 1) / pgsz);
      }
      rc = sqlite3_reset(pStmt);
      if( rc!=SQLITE_OK || pCsr->nRowAvg==0 ) return rc;
................................................................................
  sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
  sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
  sqlite3_step(pStmt);
  *pRC = sqlite3_reset(pStmt);
}

/*
** Record 0 of the %_stat table contains a blob consisting of N varints,
** where N is the number of user defined columns in the fts3 table plus
** two. If nCol is the number of user defined columns, then values of the 
** varints are set as follows:
**
**   Varint 0:       Total number of rows in the table.
**
**   Varint 1..nCol: For each column, the total number of tokens stored in
**                   the column for all rows of the table.
**
**   Varint 1+nCol:  The total size, in bytes, of all text values in all
**                   columns of all rows of the table.
**
*/
static void fts3UpdateDocTotals(
  int *pRC,                       /* The result code */
  Fts3Table *p,                   /* Table being updated */
  u32 *aSzIns,                    /* Size increases */
  u32 *aSzDel,                    /* Size decreases */
  int nChng                       /* Change in the number of documents */
){
  char *pBlob;             /* Storage for BLOB written into %_stat */
  int nBlob;               /* Size of BLOB written into %_stat */
  u32 *a;                  /* Array of integers that becomes the BLOB */
  sqlite3_stmt *pStmt;     /* Statement for reading and writing */
  int i;                   /* Loop counter */
  int rc;                  /* Result code from subfunctions */

  const int nStat = p->nColumn+2;

  if( *pRC ) return;
  a = sqlite3_malloc( (sizeof(u32)+10)*nStat );
  if( a==0 ){
    *pRC = SQLITE_NOMEM;
    return;
  }
  pBlob = (char*)&a[nStat];
  rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
  if( rc ){
    sqlite3_free(a);
    *pRC = rc;
    return;
  }
  if( sqlite3_step(pStmt)==SQLITE_ROW ){
    fts3DecodeIntArray(nStat, a,
         sqlite3_column_blob(pStmt, 0),
         sqlite3_column_bytes(pStmt, 0));
  }else{
    memset(a, 0, sizeof(u32)*(nStat) );
  }
  sqlite3_reset(pStmt);
  if( nChng<0 && a[0]<(u32)(-nChng) ){
    a[0] = 0;
  }else{
    a[0] += nChng;
  }
  for(i=0; i<p->nColumn+1; i++){
    u32 x = a[i+1];
    if( x+aSzIns[i] < aSzDel[i] ){
      x = 0;
    }else{
      x = x + aSzIns[i] - aSzDel[i];
    }
    a[i+1] = x;
  }
  fts3EncodeIntArray(nStat, a, pBlob, &nBlob);
  rc = fts3SqlStmt(p, SQL_REPLACE_DOCTOTAL, &pStmt, 0);
  if( rc ){
    sqlite3_free(a);
    *pRC = rc;
    return;
  }
  sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
................................................................................
  u32 *aSzIns;                    /* Sizes of inserted documents */
  u32 *aSzDel;                    /* Sizes of deleted documents */
  int nChng = 0;                  /* Net change in number of documents */

  assert( p->pSegments==0 );

  /* Allocate space to hold the change in document sizes */
  aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
  if( aSzIns==0 ) return SQLITE_NOMEM;
  aSzDel = &aSzIns[p->nColumn+1];
  memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2);

  /* If this is a DELETE or UPDATE operation, remove the old record. */
  if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
    int isEmpty = 0;
    rc = fts3IsEmpty(p, apVal, &isEmpty);
    if( rc==SQLITE_OK ){
      if( isEmpty ){

Changes to test/fts3defer.test.

184
185
186
187
188
189
190









191
192
193
194
195
196
197
...
209
210
211
212
213
214
215

216
217
218
219
220
221

222
223
224
225
226
227
228
...
235
236
237
238
239
240
241

242
243
244
245
246
247
248
  "csjqxhgj zm jk jk duszemmzl zk xh zm jk zf"
  "urvysbnykk dzadnqzprr csjqxhgj mjpavjuhw ubwrfqnbjf nkaotm jk jk zm drir"
  "nvfasfh xh igju zm wluvgsw jk zm srwwnezqk ewle ovnq"
  "jk nvfasfh eh ktxdty urvysbnykk vgsld zm jk eh uenvbm"
  "orpfawpx pahlds jk uhzq hi zm zm zf jk dzadnqzprr"
  "srwwnezqk csjqxhgj rbwzuf nvfasfh jcpiwj xldlpy nvfasfh jk vgsld wjybxmieki"
}










#set e [list]
#foreach d $data {set e [concat $e $d]}
#puts [lsort -unique $e]
#exit

set zero_long_doclists {
................................................................................
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
  }
  3 {
    set dmt_modes {0 1 2}
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }

    execsql $zero_long_doclists
  }
  4 {
    set dmt_modes 0
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }

    execsql "INSERT INTO t1(t1) VALUES('optimize')"
    execsql $zero_long_doclists
  }
} {

  execsql { DROP TABLE IF EXISTS t1 }
  eval $setup
................................................................................

  do_select_test 1.1 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf'
  } {13 29 40 47 48 52 63 92}
  do_select_test 1.2 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh'
  } {100}

  do_select_test 1.3 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf'
  } {7 70 98}
  do_select_test 1.4 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk'
  } {3 5 8 10 13 18 20 23 32 37 41 43 55 60 65 67 72 74 76 81 94 96 97}
  do_select_test 1.5 {







>
>
>
>
>
>
>
>
>







 







>






>







 







>







184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
...
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
...
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
  "csjqxhgj zm jk jk duszemmzl zk xh zm jk zf"
  "urvysbnykk dzadnqzprr csjqxhgj mjpavjuhw ubwrfqnbjf nkaotm jk jk zm drir"
  "nvfasfh xh igju zm wluvgsw jk zm srwwnezqk ewle ovnq"
  "jk nvfasfh eh ktxdty urvysbnykk vgsld zm jk eh uenvbm"
  "orpfawpx pahlds jk uhzq hi zm zm zf jk dzadnqzprr"
  "srwwnezqk csjqxhgj rbwzuf nvfasfh jcpiwj xldlpy nvfasfh jk vgsld wjybxmieki"
}

proc add_empty_records {n} {
  execsql BEGIN
  for {set i 0} {$i < $n} {incr i} {
    execsql { INSERT INTO t1 VALUES('') }
  }
  execsql COMMIT
}


#set e [list]
#foreach d $data {set e [concat $e $d]}
#puts [lsort -unique $e]
#exit

set zero_long_doclists {
................................................................................
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
  }
  3 {
    set dmt_modes {0 1 2}
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
    add_empty_records 1000
    execsql $zero_long_doclists
  }
  4 {
    set dmt_modes 0
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
    add_empty_records 1000
    execsql "INSERT INTO t1(t1) VALUES('optimize')"
    execsql $zero_long_doclists
  }
} {

  execsql { DROP TABLE IF EXISTS t1 }
  eval $setup
................................................................................

  do_select_test 1.1 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf'
  } {13 29 40 47 48 52 63 92}
  do_select_test 1.2 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh'
  } {100}
if {$tn==3} breakpoint
  do_select_test 1.3 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf'
  } {7 70 98}
  do_select_test 1.4 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk'
  } {3 5 8 10 13 18 20 23 32 37 41 43 55 60 65 67 72 74 76 81 94 96 97}
  do_select_test 1.5 {