SQLite

Check-in [941647d121]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In fts4, store the total number of bytes of for all records in the table in the %_stat table.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1: 941647d121ac60e2eabc998cfe79b157fb918d7e
User & Date: dan 2010-10-27 10:55:54.000
Context
2010-10-27
16:52
Fix a buffer overread in fts3 that can occur if the database is corrupt. (Closed-Leaf check-in: 84194c4195 user: dan tags: experimental)
10:55
In fts4, store the total number of bytes of for all records in the table in the %_stat table. (check-in: 941647d121 user: dan tags: experimental)
2010-10-26
18:42
Structural coverage tests for vdbeblob.c. Including experimental new API sqlite3_blob_reopen(). (check-in: 97c6b2616d user: dan tags: experimental)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3_write.c.
579
580
581
582
583
584
585

586
587
588
589
590
591
592
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
    if( zText ){
      int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
      if( rc!=SQLITE_OK ){
        return rc;
      }
    }

  }
  return SQLITE_OK;
}

/*
** This function is called by the xUpdate() method for an INSERT operation.
** The apVal parameter is passed a copy of the apVal argument passed by







>







579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
    if( zText ){
      int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
      if( rc!=SQLITE_OK ){
        return rc;
      }
    }
    aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
  }
  return SQLITE_OK;
}

/*
** This function is called by the xUpdate() method for an INSERT operation.
** The apVal parameter is passed a copy of the apVal argument passed by
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
}

/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
*/
static void fts3DeleteTerms(
  int *pRC,               /* Result code */
  Fts3Table *p,           /* The FTS table to delete from */
  sqlite3_value **apVal,  /* apVal[] contains the docid to be deleted */
  u32 *aSz                /* Sizes of deleted document written here */
){
  int rc;
  sqlite3_stmt *pSelect;







|







677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
}

/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
*/
static void fts3DeleteTerms( 
  int *pRC,               /* Result code */
  Fts3Table *p,           /* The FTS table to delete from */
  sqlite3_value **apVal,  /* apVal[] contains the docid to be deleted */
  u32 *aSz                /* Sizes of deleted document written here */
){
  int rc;
  sqlite3_stmt *pSelect;
698
699
700
701
702
703
704

705
706
707
708
709
710
711
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
        rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
        if( rc!=SQLITE_OK ){
          sqlite3_reset(pSelect);
          *pRC = rc;
          return;
        }

      }
    }
    rc = sqlite3_reset(pSelect);
  }else{
    sqlite3_reset(pSelect);
  }
  *pRC = rc;







>







699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
        rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
        if( rc!=SQLITE_OK ){
          sqlite3_reset(pSelect);
          *pRC = rc;
          return;
        }
        aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
      }
    }
    rc = sqlite3_reset(pSelect);
  }else{
    sqlite3_reset(pSelect);
  }
  *pRC = rc;
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
        sqlite3_int64 nDoc = 0;
        sqlite3_int64 nByte = 0;
        const char *a = sqlite3_column_blob(pStmt, 0);
        if( a ){
          const char *pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
          a += sqlite3Fts3GetVarint(a, &nDoc);
          while( a<pEnd ){
            sqlite3_int64 nVarint;
            a += sqlite3Fts3GetVarint(a, &nVarint);
            nByte += nVarint;
          }
        }

        pCsr->nRowAvg = (((nByte / nDoc) + pgsz - 1) / pgsz);
      }
      rc = sqlite3_reset(pStmt);
      if( rc!=SQLITE_OK || pCsr->nRowAvg==0 ) return rc;







<
|
<







1031
1032
1033
1034
1035
1036
1037

1038

1039
1040
1041
1042
1043
1044
1045
        sqlite3_int64 nDoc = 0;
        sqlite3_int64 nByte = 0;
        const char *a = sqlite3_column_blob(pStmt, 0);
        if( a ){
          const char *pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
          a += sqlite3Fts3GetVarint(a, &nDoc);
          while( a<pEnd ){

            a += sqlite3Fts3GetVarint(a, &nByte);

          }
        }

        pCsr->nRowAvg = (((nByte / nDoc) + pgsz - 1) / pgsz);
      }
      rc = sqlite3_reset(pStmt);
      if( rc!=SQLITE_OK || pCsr->nRowAvg==0 ) return rc;
2513
2514
2515
2516
2517
2518
2519
2520


2521




2522




2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537


2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
  sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
  sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
  sqlite3_step(pStmt);
  *pRC = sqlite3_reset(pStmt);
}

/*
** Update the 0 record of the %_stat table so that it holds a blob


** which contains the document count followed by the cumulative




** document sizes for all columns.




*/
static void fts3UpdateDocTotals(
  int *pRC,       /* The result code */
  Fts3Table *p,   /* Table being updated */
  u32 *aSzIns,    /* Size increases */
  u32 *aSzDel,    /* Size decreases */
  int nChng       /* Change in the number of documents */
){
  char *pBlob;             /* Storage for BLOB written into %_stat */
  int nBlob;               /* Size of BLOB written into %_stat */
  u32 *a;                  /* Array of integers that becomes the BLOB */
  sqlite3_stmt *pStmt;     /* Statement for reading and writing */
  int i;                   /* Loop counter */
  int rc;                  /* Result code from subfunctions */



  if( *pRC ) return;
  a = sqlite3_malloc( (sizeof(u32)+10)*(p->nColumn+1) );
  if( a==0 ){
    *pRC = SQLITE_NOMEM;
    return;
  }
  pBlob = (char*)&a[p->nColumn+1];
  rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
  if( rc ){
    sqlite3_free(a);
    *pRC = rc;
    return;
  }
  if( sqlite3_step(pStmt)==SQLITE_ROW ){
    fts3DecodeIntArray(p->nColumn+1, a,
         sqlite3_column_blob(pStmt, 0),
         sqlite3_column_bytes(pStmt, 0));
  }else{
    memset(a, 0, sizeof(u32)*(p->nColumn+1) );
  }
  sqlite3_reset(pStmt);
  if( nChng<0 && a[0]<(u32)(-nChng) ){
    a[0] = 0;
  }else{
    a[0] += nChng;
  }
  for(i=0; i<p->nColumn; i++){
    u32 x = a[i+1];
    if( x+aSzIns[i] < aSzDel[i] ){
      x = 0;
    }else{
      x = x + aSzIns[i] - aSzDel[i];
    }
    a[i+1] = x;
  }
  fts3EncodeIntArray(p->nColumn+1, a, pBlob, &nBlob);
  rc = fts3SqlStmt(p, SQL_REPLACE_DOCTOTAL, &pStmt, 0);
  if( rc ){
    sqlite3_free(a);
    *pRC = rc;
    return;
  }
  sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);







|
>
>
|
>
>
>
>
|
>
>
>
>


|
|
|
|
|








>
>

|




|







|



|







|








|







2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
  sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
  sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
  sqlite3_step(pStmt);
  *pRC = sqlite3_reset(pStmt);
}

/*
** Record 0 of the %_stat table contains a blob consisting of N varints,
** where N is the number of user defined columns in the fts3 table plus
** two. If nCol is the number of user defined columns, then values of the 
** varints are set as follows:
**
**   Varint 0:       Total number of rows in the table.
**
**   Varint 1..nCol: For each column, the total number of tokens stored in
**                   the column for all rows of the table.
**
**   Varint 1+nCol:  The total size, in bytes, of all text values in all
**                   columns of all rows of the table.
**
*/
static void fts3UpdateDocTotals(
  int *pRC,                       /* The result code */
  Fts3Table *p,                   /* Table being updated */
  u32 *aSzIns,                    /* Size increases */
  u32 *aSzDel,                    /* Size decreases */
  int nChng                       /* Change in the number of documents */
){
  char *pBlob;             /* Storage for BLOB written into %_stat */
  int nBlob;               /* Size of BLOB written into %_stat */
  u32 *a;                  /* Array of integers that becomes the BLOB */
  sqlite3_stmt *pStmt;     /* Statement for reading and writing */
  int i;                   /* Loop counter */
  int rc;                  /* Result code from subfunctions */

  const int nStat = p->nColumn+2;

  if( *pRC ) return;
  a = sqlite3_malloc( (sizeof(u32)+10)*nStat );
  if( a==0 ){
    *pRC = SQLITE_NOMEM;
    return;
  }
  pBlob = (char*)&a[nStat];
  rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
  if( rc ){
    sqlite3_free(a);
    *pRC = rc;
    return;
  }
  if( sqlite3_step(pStmt)==SQLITE_ROW ){
    fts3DecodeIntArray(nStat, a,
         sqlite3_column_blob(pStmt, 0),
         sqlite3_column_bytes(pStmt, 0));
  }else{
    memset(a, 0, sizeof(u32)*(nStat) );
  }
  sqlite3_reset(pStmt);
  if( nChng<0 && a[0]<(u32)(-nChng) ){
    a[0] = 0;
  }else{
    a[0] += nChng;
  }
  for(i=0; i<p->nColumn+1; i++){
    u32 x = a[i+1];
    if( x+aSzIns[i] < aSzDel[i] ){
      x = 0;
    }else{
      x = x + aSzIns[i] - aSzDel[i];
    }
    a[i+1] = x;
  }
  fts3EncodeIntArray(nStat, a, pBlob, &nBlob);
  rc = fts3SqlStmt(p, SQL_REPLACE_DOCTOTAL, &pStmt, 0);
  if( rc ){
    sqlite3_free(a);
    *pRC = rc;
    return;
  }
  sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
  u32 *aSzIns;                    /* Sizes of inserted documents */
  u32 *aSzDel;                    /* Sizes of deleted documents */
  int nChng = 0;                  /* Net change in number of documents */

  assert( p->pSegments==0 );

  /* Allocate space to hold the change in document sizes */
  aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*p->nColumn*2 );
  if( aSzIns==0 ) return SQLITE_NOMEM;
  aSzDel = &aSzIns[p->nColumn];
  memset(aSzIns, 0, sizeof(aSzIns[0])*p->nColumn*2);

  /* If this is a DELETE or UPDATE operation, remove the old record. */
  if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
    int isEmpty = 0;
    rc = fts3IsEmpty(p, apVal, &isEmpty);
    if( rc==SQLITE_OK ){
      if( isEmpty ){







|

|
|







2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
  u32 *aSzIns;                    /* Sizes of inserted documents */
  u32 *aSzDel;                    /* Sizes of deleted documents */
  int nChng = 0;                  /* Net change in number of documents */

  assert( p->pSegments==0 );

  /* Allocate space to hold the change in document sizes */
  aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
  if( aSzIns==0 ) return SQLITE_NOMEM;
  aSzDel = &aSzIns[p->nColumn+1];
  memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2);

  /* If this is a DELETE or UPDATE operation, remove the old record. */
  if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
    int isEmpty = 0;
    rc = fts3IsEmpty(p, apVal, &isEmpty);
    if( rc==SQLITE_OK ){
      if( isEmpty ){
Changes to test/fts3defer.test.
184
185
186
187
188
189
190









191
192
193
194
195
196
197
  "csjqxhgj zm jk jk duszemmzl zk xh zm jk zf"
  "urvysbnykk dzadnqzprr csjqxhgj mjpavjuhw ubwrfqnbjf nkaotm jk jk zm drir"
  "nvfasfh xh igju zm wluvgsw jk zm srwwnezqk ewle ovnq"
  "jk nvfasfh eh ktxdty urvysbnykk vgsld zm jk eh uenvbm"
  "orpfawpx pahlds jk uhzq hi zm zm zf jk dzadnqzprr"
  "srwwnezqk csjqxhgj rbwzuf nvfasfh jcpiwj xldlpy nvfasfh jk vgsld wjybxmieki"
}










#set e [list]
#foreach d $data {set e [concat $e $d]}
#puts [lsort -unique $e]
#exit

set zero_long_doclists {







>
>
>
>
>
>
>
>
>







184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
  "csjqxhgj zm jk jk duszemmzl zk xh zm jk zf"
  "urvysbnykk dzadnqzprr csjqxhgj mjpavjuhw ubwrfqnbjf nkaotm jk jk zm drir"
  "nvfasfh xh igju zm wluvgsw jk zm srwwnezqk ewle ovnq"
  "jk nvfasfh eh ktxdty urvysbnykk vgsld zm jk eh uenvbm"
  "orpfawpx pahlds jk uhzq hi zm zm zf jk dzadnqzprr"
  "srwwnezqk csjqxhgj rbwzuf nvfasfh jcpiwj xldlpy nvfasfh jk vgsld wjybxmieki"
}

proc add_empty_records {n} {
  execsql BEGIN
  for {set i 0} {$i < $n} {incr i} {
    execsql { INSERT INTO t1 VALUES('') }
  }
  execsql COMMIT
}


#set e [list]
#foreach d $data {set e [concat $e $d]}
#puts [lsort -unique $e]
#exit

set zero_long_doclists {
209
210
211
212
213
214
215

216
217
218
219
220
221

222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241

242
243
244
245
246
247
248
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
  }
  3 {
    set dmt_modes {0 1 2}
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }

    execsql $zero_long_doclists
  }
  4 {
    set dmt_modes 0
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }

    execsql "INSERT INTO t1(t1) VALUES('optimize')"
    execsql $zero_long_doclists
  }
} {

  execsql { DROP TABLE IF EXISTS t1 }
  eval $setup
  set ::testprefix fts3defer-2.$tn
  set DO_MALLOC_TEST 0

  do_execsql_test 0 { 
    SELECT count(*) FROM t1_segments WHERE length(block)>10000 
  } {2}

  do_select_test 1.1 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf'
  } {13 29 40 47 48 52 63 92}
  do_select_test 1.2 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh'
  } {100}

  do_select_test 1.3 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf'
  } {7 70 98}
  do_select_test 1.4 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk'
  } {3 5 8 10 13 18 20 23 32 37 41 43 55 60 65 67 72 74 76 81 94 96 97}
  do_select_test 1.5 {







>






>




















>







218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
  }
  3 {
    set dmt_modes {0 1 2}
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
    add_empty_records 1000
    execsql $zero_long_doclists
  }
  4 {
    set dmt_modes 0
    execsql { CREATE VIRTUAL TABLE t1 USING FTS4 }
    foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } }
    add_empty_records 1000
    execsql "INSERT INTO t1(t1) VALUES('optimize')"
    execsql $zero_long_doclists
  }
} {

  execsql { DROP TABLE IF EXISTS t1 }
  eval $setup
  set ::testprefix fts3defer-2.$tn
  set DO_MALLOC_TEST 0

  do_execsql_test 0 { 
    SELECT count(*) FROM t1_segments WHERE length(block)>10000 
  } {2}

  do_select_test 1.1 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf'
  } {13 29 40 47 48 52 63 92}
  do_select_test 1.2 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh'
  } {100}
if {$tn==3} breakpoint
  do_select_test 1.3 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf'
  } {7 70 98}
  do_select_test 1.4 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk'
  } {3 5 8 10 13 18 20 23 32 37 41 43 55 60 65 67 72 74 76 81 94 96 97}
  do_select_test 1.5 {