Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix an fts5 problem to do with initializing the global size record. Also have the checksum routine ignore size records when calculating the index checksum.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | matchinfo
Files: files | file ages | folders
SHA1: e7b52edf68b0e3a2202627496c182aac4c5ffb71
User & Date: dan 2013-01-08 11:45:08.240
Context
2013-01-08
20:35
Add tests and many fixes for snippet implementation. Some tests are still failing. check-in: a257d81d4b user: dan tags: matchinfo
11:45
Fix an fts5 problem to do with initializing the global size record. Also have the checksum routine ignore size records when calculating the index checksum. check-in: e7b52edf68 user: dan tags: matchinfo
2013-01-07
19:52
Add an implementation of snippet() and its associated mi apis to fts5. check-in: 8d94102cd3 user: dan tags: matchinfo
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/fts5.c.
48
49
50
51
52
53
54



55
56
57
58
59
60
61
**
**   Or, if the column contains tokens from multiple streams, the first
**   varint contains a bitmask indicating which of the streams are present
**   (stored as ((bitmask << 1) | 0x01)). Following the bitmask is a
**   varint containing the number of tokens for each stream present, in
**   ascending order of stream number.
**



** Global size record:
**   There is a single "global size" record stored in the database. The
**   database key for this record is a single byte - 0x00.
**
**   The data for this record is a series of varint values. The first 
**   varint is the total number of rows in the table. The subsequent
**   varints make up a "row size" record containing the total number of







>
>
>







48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
**
**   Or, if the column contains tokens from multiple streams, the first
**   varint contains a bitmask indicating which of the streams are present
**   (stored as ((bitmask << 1) | 0x01)). Following the bitmask is a
**   varint containing the number of tokens for each stream present, in
**   ascending order of stream number.
**
**   TODO: The format above is not currently implemented! Instead, there
**   is a simpler place-holder format (which consumes more space).
**
** Global size record:
**   There is a single "global size" record stored in the database. The
**   database key for this record is a single byte - 0x00.
**
**   The data for this record is a series of varint values. The first 
**   varint is the total number of rows in the table. The subsequent
**   varints make up a "row size" record containing the total number of
1344
1345
1346
1347
1348
1349
1350











1351

1352
1353
1354
1355
1356
1357
1358
  KVCursor *pCsr = 0;             /* Cursor used to read global record */
  int rc;

  rc = sqlite4KVStoreOpenCursor(db->aDb[pInfo->iDb].pKV, &pCsr);
  if( rc==SQLITE4_OK ){
    rc = sqlite4KVCursorSeek(pCsr, aKey, nKey, 0);
    if( rc==SQLITE4_NOTFOUND ){











      rc = SQLITE4_CORRUPT_BKPT;

    }else if( rc==SQLITE4_OK ){
      const u8 *aData = 0;
      int nData = 0;
      rc = sqlite4KVCursorData(pCsr, 0, -1, &aData, &nData);
      if( rc==SQLITE4_OK ){
        int iOff = 0;
        int nStream = 0;







>
>
>
>
>
>
>
>
>
>
>
|
>







1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
  KVCursor *pCsr = 0;             /* Cursor used to read global record */
  int rc;

  rc = sqlite4KVStoreOpenCursor(db->aDb[pInfo->iDb].pKV, &pCsr);
  if( rc==SQLITE4_OK ){
    rc = sqlite4KVCursorSeek(pCsr, aKey, nKey, 0);
    if( rc==SQLITE4_NOTFOUND ){
      if( pnRow ){
        int nByte = sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nMinStream;
        pSz = sqlite4DbMallocZero(db, nByte);
        if( pSz==0 ){
          rc = SQLITE4_NOMEM;
        }else{
          pSz->aSz = (i64 *)&pSz[1];
          *pnRow = 0;
          rc = SQLITE4_OK;
        }
      }else{
        rc = SQLITE4_CORRUPT_BKPT;
      }
    }else if( rc==SQLITE4_OK ){
      const u8 *aData = 0;
      int nData = 0;
      rc = sqlite4KVCursorData(pCsr, 0, -1, &aData, &nData);
      if( rc==SQLITE4_OK ){
        int iOff = 0;
        int nStream = 0;
1369
1370
1371
1372
1373
1374
1375

1376
1377
1378
1379
1380
1381
1382
        pSz = sqlite4DbMallocZero(db, 
            sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nAlloc
        );
        if( pSz==0 ){
          rc = SQLITE4_NOMEM;
        }else{
          int iCol = 0;

          pSz->nCol = pInfo->nCol;
          pSz->nStream = nAlloc;
          while( iOff<nData ){
            int i;
            i64 *aSz = &pSz->aSz[iCol*nAlloc];
            for(i=0; i<nStream; i++){
              iOff += sqlite4GetVarint(&aData[iOff], (u64*)&aSz[i]);







>







1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
        pSz = sqlite4DbMallocZero(db, 
            sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nAlloc
        );
        if( pSz==0 ){
          rc = SQLITE4_NOMEM;
        }else{
          int iCol = 0;
          pSz->aSz = (i64 *)&pSz[1];
          pSz->nCol = pInfo->nCol;
          pSz->nStream = nAlloc;
          while( iOff<nData ){
            int i;
            i64 *aSz = &pSz->aSz[iCol*nAlloc];
            for(i=0; i<nStream; i++){
              iOff += sqlite4GetVarint(&aData[iOff], (u64*)&aSz[i]);
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786

1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797

1798
1799
1800
1801
1802
1803
1804
  u8 const *aVal; int nVal;       /* List of token instances */
  u8 const *aToken; int nToken;   /* Token for this entry */
  u8 const *aPk; int nPk;         /* Entry primary key blob */
  InstanceList sList;             /* Used to iterate through pVal */
  int nTnum;
  u32 tnum;


  aKey = (const u8 *)sqlite4_value_blob(pKey);
  nKey = sqlite4_value_bytes(pKey);
  aVal = (const u8 *)sqlite4_value_blob(pVal);
  nVal = sqlite4_value_bytes(pVal);

  /* Find the token and primary key blobs for this entry. */
  nTnum = getVarint32(aKey, tnum);

  aToken = &aKey[nTnum+1];
  nToken = sqlite4Strlen30((const char *)aToken);
  aPk = &aToken[nToken+1];
  nPk = (&aKey[nKey] - aPk);

  fts5InstanceListInit((u8 *)aVal, nVal, &sList);
  while( 0==fts5InstanceListNext(&sList) ){
    i64 v = fts5TermInstanceCksum(
        aPk, nPk, aToken, nToken, sList.iStream, sList.iCol, sList.iOff
    );
    cksum = cksum ^ v;

  }

  *piCksum = cksum;
  return SQLITE4_OK;
}

typedef struct CksumCtx CksumCtx;







<







>
|
|
|
|

|
|
|
|
|
|
>







1788
1789
1790
1791
1792
1793
1794

1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
  u8 const *aVal; int nVal;       /* List of token instances */
  u8 const *aToken; int nToken;   /* Token for this entry */
  u8 const *aPk; int nPk;         /* Entry primary key blob */
  InstanceList sList;             /* Used to iterate through pVal */
  int nTnum;
  u32 tnum;


  aKey = (const u8 *)sqlite4_value_blob(pKey);
  nKey = sqlite4_value_bytes(pKey);
  aVal = (const u8 *)sqlite4_value_blob(pVal);
  nVal = sqlite4_value_bytes(pVal);

  /* Find the token and primary key blobs for this entry. */
  nTnum = getVarint32(aKey, tnum);
  if( aKey[nTnum]!=0 ){
    aToken = &aKey[nTnum+1];
    nToken = sqlite4Strlen30((const char *)aToken);
    aPk = &aToken[nToken+1];
    nPk = (&aKey[nKey] - aPk);

    fts5InstanceListInit((u8 *)aVal, nVal, &sList);
    while( 0==fts5InstanceListNext(&sList) ){
      i64 v = fts5TermInstanceCksum(
          aPk, nPk, aToken, nToken, sList.iStream, sList.iCol, sList.iOff
          );
      cksum = cksum ^ v;
    }
  }

  *piCksum = cksum;
  return SQLITE4_OK;
}

typedef struct CksumCtx CksumCtx;