/ Check-in [29476da3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Rationalize some code in fts3 used by optimize operations, queries of the pending-terms hash table and segment merges. Add the "INSERT INTO tbl(tbl) VALUES('optimize')" syntax.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 29476da353df4c67fe744c1c5f466ba5b9c1a54b
User & Date: dan 2009-12-11 12:29:05
Context
2009-12-11
16:03
Change the fts3 test interface used to configure the advisory node size parameter. check-in: 87fc0ce1 user: dan tags: trunk
12:29
Rationalize some code in fts3 used by optimize operations, queries of the pending-terms hash table and segment merges. Add the "INSERT INTO tbl(tbl) VALUES('optimize')" syntax. check-in: 29476da3 user: dan tags: trunk
07:07
Add comment to fts3rnd.test to explain how the test works. check-in: 6b740c7c user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_write.c.

741
742
743
744
745
746
747
748
749
750

751
752
753
754
755
756
757
...
991
992
993
994
995
996
997

998
999
1000
1001
1002
1003

1004

1005


1006
1007
1008
1009
1010
1011

1012
1013
1014
1015
1016
1017
1018
....
2042
2043
2044
2045
2046
2047
2048

2049
2050
2051
2052
2053
2054
2055
2056




2057
2058

2059
2060
2061
2062
2063
2064
2065
2066
....
2092
2093
2094
2095
2096
2097
2098



2099
2100
2101
2102
2103
2104
2105
....
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135

2136

2137

2138
2139
2140
2141
2142
2143
2144
2145
2146
2147



2148
2149
2150
2151
2152

2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169



2170



2171

2172
2173
2174
2175
2176
2177
2178
2179




2180
2181
2182
2183
2184
2185


2186
2187





























2188
2189
2190
2191
2192
2193
2194
....
2198
2199
2200
2201
2202
2203
2204

2205
2206
2207
2208
2209
2210
2211
....
2222
2223
2224
2225
2226
2227
2228


2229
2230
2231
2232
2233
2234
2235
....
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259



2260
2261
2262
2263
2264
2265
2266
2267
2268
      Fts3HashElem *pElem = *(pReader->ppNextElem);
      if( pElem==0 ){
        pReader->aNode = 0;
      }else{
        PendingList *pList = (PendingList *)fts3HashData(pElem);
        pReader->zTerm = (char *)fts3HashKey(pElem);
        pReader->nTerm = fts3HashKeysize(pElem);
        pReader->nNode = pReader->nDoclist = pList->nData;
        pReader->aNode = pReader->aDoclist = pList->aData;
        pReader->ppNextElem++;

      }
      return SQLITE_OK;
    }
    if( !pReader->pStmt ){
      pReader->aNode = 0;
      return SQLITE_OK;
    }
................................................................................
){
  Fts3SegReader *pReader = 0;     /* Fts3SegReader object to return */
  Fts3HashElem **aElem = 0;       /* Array of term hash entries to scan */
  int nElem = 0;                  /* Size of array at aElem */
  int rc = SQLITE_OK;             /* Return Code */

  if( isPrefix ){

    Fts3HashElem *pE = 0;         /* Iterator variable */

    for(pE=fts3HashFirst(&p->pendingTerms); pE; pE=fts3HashNext(pE)){
      char *zKey = (char *)fts3HashKey(pE);
      int nKey = fts3HashKeysize(pE);
      if( nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm) ){

        int nByte = (1+nElem * sizeof(Fts3HashElem *));

        Fts3HashElem **aElem2 = (Fts3HashElem **)sqlite3_realloc(aElem, nByte);


        if( !aElem2 ){
          rc = SQLITE_NOMEM;
          nElem = 0;
          break;
        }
        aElem = aElem2;

        aElem[nElem++] = pE;
      }
    }

    /* If more than one term matches the prefix, sort the Fts3HashElem
    ** objects in term order using qsort(). This uses the same comparison
    ** callback as is used when flushing terms to disk.
................................................................................
  int rc;                         /* Return code */
  int iIdx;                       /* Index of new segment */
  int iNewLevel;                  /* Level to create new segment at */
  sqlite3_stmt *pStmt;
  SegmentWriter *pWriter = 0;
  int nSegment = 0;               /* Number of segments being merged */
  Fts3SegReader **apSegment = 0;  /* Array of Segment iterators */

  Fts3SegFilter filter;           /* Segment term filter condition */

  if( iLevel<0 ){
    /* This call is to merge all segments in the database to a single
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database. The index
    ** of the new segment is always 0.
    */




    iIdx = 0;
    rc = fts3SegmentCountMax(p, &nSegment, &iNewLevel);

    if( nSegment==1 ){
      return SQLITE_DONE;
    }
  }else{
    /* This call is to merge all segments at level iLevel. Find the next
    ** available segment index at level iLevel+1. The call to
    ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to 
    ** a single iLevel+2 segment if necessary.
................................................................................
  for(i=0; SQLITE_ROW==(sqlite3_step(pStmt)); i++){
    rc = fts3SegReaderNew(p, pStmt, i, &apSegment[i]);
    if( rc!=SQLITE_OK ){
      goto finished;
    }
  }
  rc = sqlite3_reset(pStmt);



  pStmt = 0;
  if( rc!=SQLITE_OK ) goto finished;

  memset(&filter, 0, sizeof(Fts3SegFilter));
  filter.flags = FTS3_SEGMENT_REQUIRE_POS;
  filter.flags |= (iLevel<0 ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
  rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment,
................................................................................
}


/* 
** Flush the contents of pendingTerms to a level 0 segment.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
  Fts3HashElem *pElem;
  int idx, rc, i;
  Fts3HashElem **apElem;          /* Array of pointers to hash elements */
  int nElem;                      /* Number of terms in new segment */

  SegmentWriter *pWriter = 0;     /* Used to write the segment */



  /* Find the number of terms that will make up the new segment. If there
  ** are no terms, return early (do not bother to write an empty segment).
  */
  nElem = fts3HashCount(&p->pendingTerms);
  if( nElem==0 ){
    assert( p->nPendingData==0 );
    return SQLITE_OK;
  }

  /* Determine the next index at level 0, merging as necessary. */



  rc = fts3AllocateSegdirIdx(p, 0, &idx);
  if( rc!=SQLITE_OK ){
    return rc;
  } 


  apElem = sqlite3_malloc(nElem*sizeof(Fts3HashElem *));
  if( !apElem ){
    return SQLITE_NOMEM;
  }

  i = 0;
  for(pElem=fts3HashFirst(&p->pendingTerms); pElem; pElem=fts3HashNext(pElem)){
    apElem[i++] = pElem;
  }
  assert( i==nElem );

  /* TODO(shess) Should we allow user-defined collation sequences,
  ** here?  I think we only need that once we support prefix searches.
  ** Also, should we be using qsort()?
  */
  if( nElem>1 ){
    qsort(apElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm);



  }






  /* Write the segment tree into the database. */
  for(i=0; rc==SQLITE_OK && i<nElem; i++){
    const char *z = fts3HashKey(apElem[i]);
    int n = fts3HashKeysize(apElem[i]);
    PendingList *pList = fts3HashData(apElem[i]);
    rc = fts3SegWriterAdd(p, &pWriter, 0, z, n, pList->aData, pList->nData+1);
  }




  if( rc==SQLITE_OK ){
    rc = fts3SegWriterFlush(p, pWriter, 0, idx);
  }

  /* Free all allocated resources before returning */
  fts3SegWriterFree(pWriter);


  sqlite3_free(apElem);
  sqlite3Fts3PendingTermsClear(p);





























  return rc;
}

/*
** This function does the work for the xUpdate method of FTS3 virtual
** tables.
*/
................................................................................
  sqlite3_value **apVal,          /* Array of arguments */
  sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
){
  Fts3Table *p = (Fts3Table *)pVtab;
  int rc = SQLITE_OK;             /* Return Code */
  int isRemove = 0;               /* True for an UPDATE or DELETE */
  sqlite3_int64 iRemove = 0;      /* Rowid removed by UPDATE or DELETE */


  /* If this is a DELETE or UPDATE operation, remove the old record. */
  if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
    int isEmpty;
    rc = fts3IsEmpty(p, apVal, &isEmpty);
    if( rc==SQLITE_OK ){
      if( isEmpty ){
................................................................................
          rc = fts3DeleteTerms(p, apVal);
          if( rc==SQLITE_OK ){
            rc = fts3SqlExec(p, SQL_DELETE_CONTENT, apVal);
          }
        }
      }
    }


  }
  
  /* If this is an INSERT or UPDATE operation, insert the new record. */
  if( nArg>1 && rc==SQLITE_OK ){
    rc = fts3InsertData(p, apVal, pRowid);
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=iRemove) ){
      rc = fts3PendingTermsDocid(p, *pRowid);
................................................................................
** merge all segments in the database (including the new segment, if 
** there was any data to flush) into a single segment. 
*/
int sqlite3Fts3Optimize(Fts3Table *p){
  int rc;
  rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts3PendingTermsFlush(p);
    if( rc==SQLITE_OK ){
      rc = fts3SegmentMerge(p, -1);
    }
    if( rc==SQLITE_OK ){
      rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);



    }else{
      sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
      sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
    }
  }
  return rc;
}

#endif







|


>







 







>





|
>
|
>
|
>
>
|
|
|
|
|
|
>







 







>








>
>
>
>


>
|







 







>
>
>







 







<
<
<
|
>

>

>
|
|

|
|
<
|


|
>
>
>

<
<
|
<
>
|
|
|
<
<
<
<
<
<
<
<
<
<
<

<
<
>
>
>
|
>
>
>
|
>

<
<
<
<
<
<
<
>
>
>
>



<
<

>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>







 







>
>







 







<
<
|
<


>
>
>









741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
...
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
....
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
....
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
....
2141
2142
2143
2144
2145
2146
2147



2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158

2159
2160
2161
2162
2163
2164
2165
2166


2167

2168
2169
2170
2171











2172


2173
2174
2175
2176
2177
2178
2179
2180
2181
2182







2183
2184
2185
2186
2187
2188
2189


2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
....
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
....
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
....
2286
2287
2288
2289
2290
2291
2292


2293

2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
      Fts3HashElem *pElem = *(pReader->ppNextElem);
      if( pElem==0 ){
        pReader->aNode = 0;
      }else{
        PendingList *pList = (PendingList *)fts3HashData(pElem);
        pReader->zTerm = (char *)fts3HashKey(pElem);
        pReader->nTerm = fts3HashKeysize(pElem);
        pReader->nNode = pReader->nDoclist = pList->nData + 1;
        pReader->aNode = pReader->aDoclist = pList->aData;
        pReader->ppNextElem++;
        assert( pReader->aNode );
      }
      return SQLITE_OK;
    }
    if( !pReader->pStmt ){
      pReader->aNode = 0;
      return SQLITE_OK;
    }
................................................................................
){
  Fts3SegReader *pReader = 0;     /* Fts3SegReader object to return */
  Fts3HashElem **aElem = 0;       /* Array of term hash entries to scan */
  int nElem = 0;                  /* Size of array at aElem */
  int rc = SQLITE_OK;             /* Return Code */

  if( isPrefix ){
    int nAlloc = 0;               /* Size of allocated array at aElem */
    Fts3HashElem *pE = 0;         /* Iterator variable */

    for(pE=fts3HashFirst(&p->pendingTerms); pE; pE=fts3HashNext(pE)){
      char *zKey = (char *)fts3HashKey(pE);
      int nKey = fts3HashKeysize(pE);
      if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){
        if( nElem==nAlloc ){
          Fts3HashElem **aElem2;
          nAlloc += 16;
          aElem2 = (Fts3HashElem **)sqlite3_realloc(
              aElem, nAlloc*sizeof(Fts3HashElem *)
          );
          if( !aElem2 ){
            rc = SQLITE_NOMEM;
            nElem = 0;
            break;
          }
          aElem = aElem2;
        }
        aElem[nElem++] = pE;
      }
    }

    /* If more than one term matches the prefix, sort the Fts3HashElem
    ** objects in term order using qsort(). This uses the same comparison
    ** callback as is used when flushing terms to disk.
................................................................................
  int rc;                         /* Return code */
  int iIdx;                       /* Index of new segment */
  int iNewLevel;                  /* Level to create new segment at */
  sqlite3_stmt *pStmt;
  SegmentWriter *pWriter = 0;
  int nSegment = 0;               /* Number of segments being merged */
  Fts3SegReader **apSegment = 0;  /* Array of Segment iterators */
  Fts3SegReader *pPending = 0;    /* Iterator for pending-terms */
  Fts3SegFilter filter;           /* Segment term filter condition */

  if( iLevel<0 ){
    /* This call is to merge all segments in the database to a single
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database. The index
    ** of the new segment is always 0.
    */
    rc = sqlite3Fts3SegReaderPending(p, 0, 0, 1, &pPending);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    iIdx = 0;
    rc = fts3SegmentCountMax(p, &nSegment, &iNewLevel);
    nSegment += (pPending!=0);
    if( nSegment<=1 ){
      return SQLITE_DONE;
    }
  }else{
    /* This call is to merge all segments at level iLevel. Find the next
    ** available segment index at level iLevel+1. The call to
    ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to 
    ** a single iLevel+2 segment if necessary.
................................................................................
  for(i=0; SQLITE_ROW==(sqlite3_step(pStmt)); i++){
    rc = fts3SegReaderNew(p, pStmt, i, &apSegment[i]);
    if( rc!=SQLITE_OK ){
      goto finished;
    }
  }
  rc = sqlite3_reset(pStmt);
  if( pPending ){
    apSegment[i] = pPending;
  }
  pStmt = 0;
  if( rc!=SQLITE_OK ) goto finished;

  memset(&filter, 0, sizeof(Fts3SegFilter));
  filter.flags = FTS3_SEGMENT_REQUIRE_POS;
  filter.flags |= (iLevel<0 ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
  rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment,
................................................................................
}


/* 
** Flush the contents of pendingTerms to a level 0 segment.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){



  int rc;                         /* Return Code */
  int idx;                        /* Index of new segment created */
  SegmentWriter *pWriter = 0;     /* Used to write the segment */
  Fts3SegReader *pReader = 0;     /* Used to iterate through the hash table */

  /* Allocate a SegReader object to iterate through the contents of the
  ** pending-terms table. If an error occurs, or if there are no terms
  ** in the pending-terms table, return immediately.
  */
  rc = sqlite3Fts3SegReaderPending(p, 0, 0, 1, &pReader);
  if( rc!=SQLITE_OK || pReader==0 ){

    return rc;
  }

  /* Determine the next index at level 0. If level 0 is already full, this
  ** call may merge all existing level 0 segments into a single level 1
  ** segment.
  */
  rc = fts3AllocateSegdirIdx(p, 0, &idx);




  /* If no errors have occured, iterate through the contents of the 
  ** pending-terms hash table using the Fts3SegReader iterator. The callback
  ** writes each term (along with its doclist) to the database via the
  ** SegmentWriter handle pWriter.











  */


  if( rc==SQLITE_OK ){
    void *c = (void *)&pWriter;   /* SegReaderIterate() callback context */
    Fts3SegFilter f;              /* SegReaderIterate() parameters */

    memset(&f, 0, sizeof(Fts3SegFilter));
    f.flags = FTS3_SEGMENT_REQUIRE_POS;
    rc = sqlite3Fts3SegReaderIterate(p, &pReader, 1, &f, fts3MergeCallback, c);
  }
  assert( pWriter || rc!=SQLITE_OK );








  /* If no errors have occured, flush the SegmentWriter object to the
  ** database. Then delete the SegmentWriter and Fts3SegReader objects
  ** allocated by this function.
  */
  if( rc==SQLITE_OK ){
    rc = fts3SegWriterFlush(p, pWriter, 0, idx);
  }


  fts3SegWriterFree(pWriter);
  sqlite3Fts3SegReaderFree(p, pReader);

  if( rc==SQLITE_OK ){
    sqlite3Fts3PendingTermsClear(p);
  }
  return rc;
}

/*
** Handle a 'special' INSERT of the form:
**
**   "INSERT INTO tbl(tbl) VALUES(<expr>)"
**
** Argument pVal contains the result of <expr>. Currently the only 
** meaningful value to insert is the text 'optimize'.
*/
static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
  int rc;                         /* Return Code */
  const char *zVal = sqlite3_value_text(pVal);
  int nVal = sqlite3_value_bytes(pVal);

  if( !zVal ){
    return SQLITE_NOMEM;
  }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
    rc = fts3SegmentMerge(p, -1);
    if( rc==SQLITE_DONE || rc==SQLITE_OK ){
      rc = SQLITE_OK;
      sqlite3Fts3PendingTermsClear(p);
    }
  }else{
    rc = SQLITE_ERROR;
  }

  return rc;
}

/*
** This function does the work for the xUpdate method of FTS3 virtual
** tables.
*/
................................................................................
  sqlite3_value **apVal,          /* Array of arguments */
  sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
){
  Fts3Table *p = (Fts3Table *)pVtab;
  int rc = SQLITE_OK;             /* Return Code */
  int isRemove = 0;               /* True for an UPDATE or DELETE */
  sqlite3_int64 iRemove = 0;      /* Rowid removed by UPDATE or DELETE */


  /* If this is a DELETE or UPDATE operation, remove the old record. */
  if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
    int isEmpty;
    rc = fts3IsEmpty(p, apVal, &isEmpty);
    if( rc==SQLITE_OK ){
      if( isEmpty ){
................................................................................
          rc = fts3DeleteTerms(p, apVal);
          if( rc==SQLITE_OK ){
            rc = fts3SqlExec(p, SQL_DELETE_CONTENT, apVal);
          }
        }
      }
    }
  }else if( sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL ){
    return fts3SpecialInsert(p, apVal[p->nColumn+2]);
  }
  
  /* If this is an INSERT or UPDATE operation, insert the new record. */
  if( nArg>1 && rc==SQLITE_OK ){
    rc = fts3InsertData(p, apVal, pRowid);
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=iRemove) ){
      rc = fts3PendingTermsDocid(p, *pRowid);
................................................................................
** merge all segments in the database (including the new segment, if 
** there was any data to flush) into a single segment. 
*/
int sqlite3Fts3Optimize(Fts3Table *p){
  int rc;
  rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
  if( rc==SQLITE_OK ){


    rc = fts3SegmentMerge(p, -1);

    if( rc==SQLITE_OK ){
      rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
      if( rc==SQLITE_OK ){
        sqlite3Fts3PendingTermsClear(p);
      }
    }else{
      sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
      sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
    }
  }
  return rc;
}

#endif

Changes to test/e_fts3.test.

180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
  INSERT INTO docs VALUES('"which is for Solomon," meaning that');
}
write_test 1.2.2.4 docs_content {
  INSERT INTO docs VALUES('the book is dedicated to Solomon.');
}
read_test  1.2.2.5 { SELECT count(*) FROM docs_segdir } {3}
write_test 1.2.2.6 docs_segdir {
  SELECT * FROM (SELECT optimize(docs) FROM docs LIMIT 1) WHERE 0;
}
read_test  1.2.2.7 { SELECT count(*) FROM docs_segdir } {1}
ddl_test   1.2.2.8 { DROP TABLE docs }

##########################################################################
# Test the examples in section 1.3 (querying FTS3 tables)
#







|







180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
  INSERT INTO docs VALUES('"which is for Solomon," meaning that');
}
write_test 1.2.2.4 docs_content {
  INSERT INTO docs VALUES('the book is dedicated to Solomon.');
}
read_test  1.2.2.5 { SELECT count(*) FROM docs_segdir } {3}
write_test 1.2.2.6 docs_segdir {
  INSERT INTO docs(docs) VALUES('optimize');
}
read_test  1.2.2.7 { SELECT count(*) FROM docs_segdir } {1}
ddl_test   1.2.2.8 { DROP TABLE docs }

##########################################################################
# Test the examples in section 1.3 (querying FTS3 tables)
#