Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Many fts5 related fixes. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
e21b7b67b5fed3bf2b86d872b50ae7f9 |
User & Date: | dan 2013-01-12 15:50:10.568 |
Context
2013-01-13
| ||
05:30 | Avoid using keyword "near" as a variable name in fts5.c. check-in: 278cfaeb70 user: dan tags: trunk | |
2013-01-12
| ||
15:50 | Many fts5 related fixes. check-in: e21b7b67b5 user: dan tags: trunk | |
15:13 | Get the amalgamation build working again. check-in: 0078080de5 user: drh tags: trunk | |
Changes
Changes to src/build.c.
︙ | ︙ | |||
2285 2286 2287 2288 2289 2290 2291 | } sqlite4OpenIndex(pParse, iIdx, iDb, pIdx, OP_OpenWrite); if( bCreate ) sqlite4VdbeChangeP5(v, 1); /* Loop through the contents of the PK index. At each row, insert the ** corresponding entry into the auxiliary index. */ addr1 = sqlite4VdbeAddOp2(v, OP_Rewind, iTab, 0); | > > > > > > > > > > > > > > | | | | | | | | | | | | > > | 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 | } sqlite4OpenIndex(pParse, iIdx, iDb, pIdx, OP_OpenWrite); if( bCreate ) sqlite4VdbeChangeP5(v, 1); /* Loop through the contents of the PK index. At each row, insert the ** corresponding entry into the auxiliary index. */ addr1 = sqlite4VdbeAddOp2(v, OP_Rewind, iTab, 0); if( pIdx->eIndexType==SQLITE4_INDEX_FTS5 ){ int regData; int i; regKey = sqlite4GetTempRange(pParse, pTab->nCol+1); regData = regKey+1; sqlite4VdbeAddOp2(v, OP_RowKey, iTab, regKey); for(i=0; i<pTab->nCol; i++){ sqlite4VdbeAddOp3(v, OP_Column, iTab, i, regData+i); } sqlite4Fts5CodeUpdate(pParse, pIdx, pParse->iNewidxReg, regKey, regData, 0); }else{ sqlite4GetTempRange(pParse,2); regKey = sqlite4GetTempReg(pParse); sqlite4EncodeIndexKey(pParse, pPk, iTab, pIdx, iIdx, 0, regKey); if( pIdx->onError!=OE_None ){ const char *zErr = "indexed columns are not unique"; int addrTest; addrTest = sqlite4VdbeAddOp4Int(v, OP_IsUnique, iIdx, 0, regKey, 0); sqlite4HaltConstraint(pParse, OE_Abort, (char *)zErr, P4_STATIC); sqlite4VdbeJumpHere(v, addrTest); } sqlite4VdbeAddOp3(v, OP_IdxInsert, iIdx, 0, regKey); } sqlite4VdbeAddOp2(v, OP_Next, iTab, addr1+1); sqlite4VdbeJumpHere(v, addr1); sqlite4ReleaseTempReg(pParse, regKey); sqlite4VdbeAddOp1(v, OP_Close, iTab); sqlite4VdbeAddOp1(v, OP_Close, iIdx); } |
︙ | ︙ | |||
2557 2558 2559 2560 2561 2562 2563 | addIndexToHash(db, pIdx); pIdx = 0; }else{ createIndexWriteSchema(pParse, pIdx, pIdxName, pEnd); } } | | | 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 | addIndexToHash(db, pIdx); pIdx = 0; }else{ createIndexWriteSchema(pParse, pIdx, pIdxName, pEnd); } } if( pIdx ) freeIndex(db, pIdx); sqlite4DbFree(db, zIdx); } sqlite4ExprListDelete(db, pList); sqlite4SrcListDelete(db, p->pTblName); } |
︙ | ︙ |
Changes to src/delete.c.
︙ | ︙ | |||
599 600 601 602 603 604 605 | int iCol; int iReg = pParse->nMem+1; pParse->nMem += (1 + pTab->nCol); for(iCol=0; iCol<pTab->nCol; iCol++){ sqlite4VdbeAddOp3(v, OP_Column, iPkCsr, iCol, iReg+iCol); } sqlite4VdbeAddOp2(v, OP_RowKey, iPkCsr, iReg+pTab->nCol); | | | 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 | int iCol; int iReg = pParse->nMem+1; pParse->nMem += (1 + pTab->nCol); for(iCol=0; iCol<pTab->nCol; iCol++){ sqlite4VdbeAddOp3(v, OP_Column, iPkCsr, iCol, iReg+iCol); } sqlite4VdbeAddOp2(v, OP_RowKey, iPkCsr, iReg+pTab->nCol); sqlite4Fts5CodeUpdate(pParse, pIdx, 0, iReg+pTab->nCol, iReg, 1); }else if( pIdx!=pPk && (aRegIdx==0 || aRegIdx[i]>0) ){ int addrNotFound; sqlite4EncodeIndexKey(pParse, pPk, baseCur+iPk,pIdx,baseCur+i,0,regKey); addrNotFound = sqlite4VdbeAddOp4(v, OP_NotFound, baseCur+i, 0, regKey, 0, P4_INT32 ); sqlite4VdbeAddOp1(v, OP_Delete, baseCur+i); |
︙ | ︙ |
Changes to src/fts5.c.
︙ | ︙ | |||
256 257 258 259 260 261 262 263 264 265 266 267 268 269 | KVCursor *pCsr; /* Cursor used to retrive values */ Mem *aMem; /* Array of column values */ int bMemValid; /* True if contents of aMem[] are valid */ Fts5Size *pSz; /* Local size data */ Fts5Size *pGlobal; /* Global size data */ i64 nGlobal; /* Total number of rows in table */ int *anRow; Fts5MatchIter *pIter; /* Used by mi_match_detail() */ }; /* ** A deserialized 'size record' (see above). | > > > > > | 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 | KVCursor *pCsr; /* Cursor used to retrive values */ Mem *aMem; /* Array of column values */ int bMemValid; /* True if contents of aMem[] are valid */ Fts5Size *pSz; /* Local size data */ Fts5Size *pGlobal; /* Global size data */ i64 nGlobal; /* Total number of rows in table */ /* Arrays used by sqlite4_mi_row_count(). */ int *anRowCS; int *anRowC; int *anRowS; int *anRow; Fts5MatchIter *pIter; /* Used by mi_match_detail() */ }; /* ** A deserialized 'size record' (see above). |
︙ | ︙ | |||
592 593 594 595 596 597 598 599 600 601 602 603 604 605 | pToken = &p->pStr->aToken[p->pStr->nToken]; zSpace = &pParse->aSpace[pParse->iSpace]; nUsed = putVarint32((u8 *)zSpace, pParse->iRoot); zSpace[nUsed++] = 0x24; pToken->bPrefix = 0; pToken->z = &zSpace[nUsed]; pToken->n = n; memcpy(pToken->z, z, n); pToken->z[n] = '\0'; nUsed += (n+1); pToken->aPrefix = (u8 *)zSpace; | > | 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 | pToken = &p->pStr->aToken[p->pStr->nToken]; zSpace = &pParse->aSpace[pParse->iSpace]; nUsed = putVarint32((u8 *)zSpace, pParse->iRoot); zSpace[nUsed++] = 0x24; pToken->bPrefix = 0; pToken->pPrefix = 0; pToken->z = &zSpace[nUsed]; pToken->n = n; memcpy(pToken->z, z, n); pToken->z[n] = '\0'; nUsed += (n+1); pToken->aPrefix = (u8 *)zSpace; |
︙ | ︙ | |||
1266 1267 1268 1269 1270 1271 1272 | int nToken, int iSrc, int nSrc ){ TokenizeCtx *p = (TokenizeCtx *)pCtx; sqlite4 *db = p->db; TokenizeTerm *pTerm = 0; | < | 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 | int nToken, int iSrc, int nSrc ){ TokenizeCtx *p = (TokenizeCtx *)pCtx; sqlite4 *db = p->db; TokenizeTerm *pTerm = 0; /* TODO: Error here if iStream is out of range */ if( nToken>p->nMax ) p->nMax = nToken; if( iStream>=p->nStream ){ int nOld = p->nStream; |
︙ | ︙ | |||
1290 1291 1292 1293 1294 1295 1296 | pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken); if( pTerm==0 ){ /* Size the initial allocation so that it fits in the lookaside buffer */ int nAlloc = sizeof(TokenizeTerm) + nToken + 32; pTerm = sqlite4DbMallocZero(p->db, nAlloc); if( pTerm ){ | < < < < < | | | > < < | < | 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 | pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken); if( pTerm==0 ){ /* Size the initial allocation so that it fits in the lookaside buffer */ int nAlloc = sizeof(TokenizeTerm) + nToken + 32; pTerm = sqlite4DbMallocZero(p->db, nAlloc); if( pTerm ){ pTerm->nAlloc = sqlite4DbMallocSize(p->db, pTerm); pTerm->nToken = nToken; memcpy(&pTerm[1], zToken, nToken); } if( pTerm==0 ) goto tokenize_cb_out; }else{ sqlite4HashInsert(&p->hash, zToken, nToken, 0); } if( iStream!=pTerm->iStream ){ pTerm = fts5TokenizeAppendInt(p, pTerm, (iStream << 2) | 0x00000003); if( !pTerm ) goto tokenize_cb_out; pTerm->iStream = iStream; } if( pTerm && p->iCol!=pTerm->iCol ){ pTerm = fts5TokenizeAppendInt(p, pTerm, (p->iCol << 2) | 0x00000001); if( !pTerm ) goto tokenize_cb_out; pTerm->iCol = p->iCol; pTerm->iOff = 0; } pTerm = fts5TokenizeAppendInt(p, pTerm, (iOff-pTerm->iOff) << 1); if( !pTerm ) goto tokenize_cb_out; pTerm->iOff = iOff; tokenize_cb_out: sqlite4HashInsert(&p->hash, (char *)&pTerm[1], nToken, pTerm); if( !pTerm ){ p->rc = SQLITE4_NOMEM; return 1; } return 0; } |
︙ | ︙ | |||
1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 | /* ** Update an fts index. */ int sqlite4Fts5Update( sqlite4 *db, /* Database handle */ Fts5Info *pInfo, /* Description of fts index to update */ Mem *pKey, /* Primary key blob */ Mem *aArg, /* Array of arguments (see above) */ int bDel, /* True for a delete, false for insert */ char **pzErr /* OUT: Error message */ ){ int i; int rc = SQLITE4_OK; KVStore *pStore; TokenizeCtx sCtx; int nTnum = 0; u32 dummy = 0; u8 *aSpace = 0; int nSpace = 0; const u8 *pPK; int nPK; HashElem *pElem; pStore = db->aDb[pInfo->iDb].pKV; memset(&sCtx, 0, sizeof(sCtx)); sCtx.db = db; sCtx.nCol = pInfo->nCol; sqlite4HashInit(db->pEnv, &sCtx.hash, 1); | > > | 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 | /* ** Update an fts index. */ int sqlite4Fts5Update( sqlite4 *db, /* Database handle */ Fts5Info *pInfo, /* Description of fts index to update */ int iRoot, Mem *pKey, /* Primary key blob */ Mem *aArg, /* Array of arguments (see above) */ int bDel, /* True for a delete, false for insert */ char **pzErr /* OUT: Error message */ ){ int i; int rc = SQLITE4_OK; KVStore *pStore; TokenizeCtx sCtx; int nTnum = 0; u32 dummy = 0; u8 *aSpace = 0; int nSpace = 0; const u8 *pPK; int nPK; HashElem *pElem; if( iRoot==0 ) iRoot = pInfo->iRoot; pStore = db->aDb[pInfo->iDb].pKV; memset(&sCtx, 0, sizeof(sCtx)); sCtx.db = db; sCtx.nCol = pInfo->nCol; sqlite4HashInit(db->pEnv, &sCtx.hash, 1); |
︙ | ︙ | |||
1538 1539 1540 1541 1542 1543 1544 | ** * space for the size record and key for this document, and ** * space for the updated global size record for the document set. ** ** To make it easier, the below allocates enough space to simultaneously ** store the largest index record key and the largest possible global ** size record. */ | | | | 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 | ** * space for the size record and key for this document, and ** * space for the updated global size record for the document set. ** ** To make it easier, the below allocates enough space to simultaneously ** store the largest index record key and the largest possible global ** size record. */ nSpace = (sqlite4VarintLen(iRoot) + 2 + sCtx.nMax + nPK) + (9 * (2 + pInfo->nCol * sCtx.nStream)); aSpace = sqlite4DbMallocRaw(db, nSpace); if( aSpace==0 ) rc = SQLITE4_NOMEM; for(pElem=sqliteHashFirst(&sCtx.hash); pElem; pElem=sqliteHashNext(pElem)){ TokenizeTerm *pTerm = (TokenizeTerm *)sqliteHashData(pElem); if( rc==SQLITE4_OK ){ int nToken = sqliteHashKeysize(pElem); char *zToken = (char *)sqliteHashKey(pElem); u8 *aKey = aSpace; int nKey; nKey = putVarint32(aKey, iRoot); aKey[nKey++] = 0x24; memcpy(&aKey[nKey], zToken, nToken); nKey += nToken; aKey[nKey++] = 0x00; memcpy(&aKey[nKey], pPK, nPK); nKey += nPK; |
︙ | ︙ | |||
1577 1578 1579 1580 1581 1582 1583 | } /* Write the size record into the db */ if( rc==SQLITE4_OK ){ u8 *aKey = aSpace; int nKey; | | | 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 | } /* Write the size record into the db */ if( rc==SQLITE4_OK ){ u8 *aKey = aSpace; int nKey; nKey = putVarint32(aKey, iRoot); aKey[nKey++] = 0x00; memcpy(&aKey[nKey], pPK, nPK); nKey += nPK; if( bDel==0 ){ Fts5Size sSz; sSz.nCol = pInfo->nCol; |
︙ | ︙ | |||
1600 1601 1602 1603 1604 1605 1606 | /* Update the global record */ if( rc==SQLITE4_OK ){ Fts5Size *pSz; /* Deserialized global size record */ i64 nRow; /* Number of rows in indexed table */ u8 *aKey = aSpace; /* Space to format the global record key */ int nKey; /* Size of global record key in bytes */ | | | 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 | /* Update the global record */ if( rc==SQLITE4_OK ){ Fts5Size *pSz; /* Deserialized global size record */ i64 nRow; /* Number of rows in indexed table */ u8 *aKey = aSpace; /* Space to format the global record key */ int nKey; /* Size of global record key in bytes */ nKey = putVarint32(aKey, iRoot); aKey[nKey++] = 0x00; rc = fts5LoadSizeRecord(db, aKey, nKey, sCtx.nStream, pInfo, &nRow, &pSz); assert( rc!=SQLITE4_OK || pSz->nStream>=sCtx.nStream ); if( rc==SQLITE4_OK ){ int iCol; for(iCol=0; iCol<pSz->nCol; iCol++){ |
︙ | ︙ | |||
1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 | return pInfo; } void sqlite4Fts5CodeUpdate( Parse *pParse, Index *pIdx, int iRegPk, int iRegData, int bDel ){ Vdbe *v; Fts5Info *pInfo; /* p4 argument for FtsUpdate opcode */ if( 0==(pInfo = fts5InfoCreate(pParse, pIdx, 0)) ) return; v = sqlite4GetVdbe(pParse); | > | | 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 | return pInfo; } void sqlite4Fts5CodeUpdate( Parse *pParse, Index *pIdx, int iRegRoot, int iRegPk, int iRegData, int bDel ){ Vdbe *v; Fts5Info *pInfo; /* p4 argument for FtsUpdate opcode */ if( 0==(pInfo = fts5InfoCreate(pParse, pIdx, 0)) ) return; v = sqlite4GetVdbe(pParse); sqlite4VdbeAddOp3(v, OP_FtsUpdate, iRegPk, iRegRoot, iRegData); sqlite4VdbeChangeP4(v, -1, (const char *)pInfo, P4_FTS5INFO); sqlite4VdbeChangeP5(v, (u8)bDel); } void sqlite4Fts5CodeQuery( Parse *pParse, Index *pIdx, |
︙ | ︙ | |||
2931 2932 2933 2934 2935 2936 2937 | int *pnMatch, int *pnDoc, int *pnRelevant ){ return SQLITE4_OK; } | | > > > > > > > | > | > > > | | > > < > | > > > > | > > | | | > | | > > > | > > > | | | 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 | int *pnMatch, int *pnDoc, int *pnRelevant ){ return SQLITE4_OK; } static void fts5StrLoadRowcounts( Fts5Str *pStr, int nStream, int *anRow, int *anRowC, int *anRowS, int *pnRowCS ){ u32 mask = 0; int iPrevCol = -1; InstanceList sList; fts5InstanceListInit(pStr->aList, pStr->nList, &sList); while( 0==fts5InstanceListNext(&sList) ){ if( iPrevCol<0 ) (*pnRowCS)++; if( sList.iCol!=iPrevCol ){ mask = 0; anRowC[sList.iCol]++; } if( (mask & (1<<sList.iStream))==0 ){ anRow[sList.iCol * nStream + sList.iStream]++; mask |= (1<<sList.iStream); iPrevCol = sList.iCol; } } } static int fts5ExprLoadRowcounts( sqlite4 *db, Fts5Cursor *pCsr, int nStream, Fts5ExprNode *pNode, int *piStr ){ int rc = SQLITE4_OK; if( pNode ){ Fts5Info *pInfo = pCsr->pInfo; if( pNode->eType==TOKEN_PRIMITIVE ){ Fts5Phrase *pPhrase = pNode->pPhrase; int iStr = *piStr; rc = fts5ExprAdvance(db, pNode, 1); while( rc==SQLITE4_OK && pNode->aPk ){ int nIncr = pInfo->nCol * nStream; /* Values for each Fts5Str */ int i; for(i=0; i<pPhrase->nStr; i++){ int *anRow = &pCsr->anRow[(iStr+i) * pInfo->nCol * nStream]; int *anRowC = &pCsr->anRowC[(iStr+i) * pInfo->nCol]; int *anRowS = &pCsr->anRowS[(iStr+i) * nStream]; int *pnRowCS = &pCsr->anRowCS[iStr+i]; fts5StrLoadRowcounts( &pPhrase->aStr[i], nStream, anRow, anRowC, anRowS, pnRowCS ); } rc = fts5ExprAdvance(db, pNode, 0); } *piStr = iStr + pPhrase->nStr; } if( rc==SQLITE4_OK ){ rc = fts5ExprLoadRowcounts(db, pCsr, nStream, pNode->pLeft, piStr); } if( rc==SQLITE4_OK ){ rc = fts5ExprLoadRowcounts(db, pCsr, nStream, pNode->pRight, piStr); } } return rc; } static int fts5CsrLoadRowcounts(Fts5Cursor *pCsr){ int rc = SQLITE4_OK; if( pCsr->anRow==0 ){ int nStream = pCsr->pGlobal->nStream; sqlite4 *db = pCsr->db; Fts5Expr *pCopy; Fts5Expr *pExpr = pCsr->pExpr; Fts5Info *pInfo = pCsr->pInfo; int *anRow; int iPhrase = 0; pCsr->anRow = anRow = (int *)sqlite4DbMallocZero(db, sizeof(int) * ( pExpr->nPhrase * pInfo->nCol * pCsr->pGlobal->nStream + pExpr->nPhrase * pInfo->nCol + pExpr->nPhrase * pCsr->pGlobal->nStream + pExpr->nPhrase )); if( !anRow ) return SQLITE4_NOMEM; pCsr->anRowC = &anRow[pExpr->nPhrase*pInfo->nCol*pCsr->pGlobal->nStream]; pCsr->anRowS = &pCsr->anRowC[pExpr->nPhrase * pInfo->nCol]; pCsr->anRowCS = &pCsr->anRowS[pExpr->nPhrase * pCsr->pGlobal->nStream]; rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, pInfo->iRoot, pInfo->azCol, pInfo->nCol, pCsr->zExpr, &pCopy, 0 ); if( rc==SQLITE4_OK ){ rc = fts5OpenExprCursors(db, pInfo, pCopy->pRoot); } if( rc==SQLITE4_OK ){ rc = fts5ExprLoadRowcounts(db, pCsr, nStream, pCopy->pRoot, &iPhrase); } fts5ExpressionFree(db, pCopy); } return rc; } |
︙ | ︙ | |||
3040 3041 3042 3043 3044 3045 3046 | int i; int nRow = 0; int nStream = pCsr->pGlobal->nStream; int nCol = pCsr->pInfo->nCol; int *aRow = &pCsr->anRow[iP * nStream * nCol]; if( iC<0 && iS<0 ){ | < | | | 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 | int i; int nRow = 0; int nStream = pCsr->pGlobal->nStream; int nCol = pCsr->pInfo->nCol; int *aRow = &pCsr->anRow[iP * nStream * nCol]; if( iC<0 && iS<0 ){ nRow = pCsr->anRowCS[iP]; }else if( iC<0 ){ for(i=0; i<nCol; i++) nRow += aRow[i*nStream + iS]; }else if( iS<0 ){ nRow = pCsr->anRowC[iP*nCol + iC]; }else if( iC<nCol && iS<nStream ){ nRow = aRow[iC * nStream + iS]; } *pn = nRow; } } |
︙ | ︙ |
Changes to src/fts5func.c.
︙ | ︙ | |||
70 71 72 73 74 75 76 77 78 79 80 81 82 83 | ** on the nature of both the documents and queries. The implementation ** below sets each parameter to the midpoint of the suggested range. */ static void fts5Rank(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){ const double b = 0.65; const double k1 = 1.6; int rc = SQLITE4_OK; /* Error code */ Fts5RankCtx *p; /* Structure to store reusable values */ int i; /* Used to iterate through phrases */ double rank = 0.0; /* UDF return value */ p = sqlite4_get_auxdata(pCtx, 0); if( p==0 ){ | > > > > > > < | 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | ** on the nature of both the documents and queries. The implementation ** below sets each parameter to the midpoint of the suggested range. */ static void fts5Rank(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){ const double b = 0.65; const double k1 = 1.6; sqlite4 *db = sqlite4_context_db_handle(pCtx); int rc = SQLITE4_OK; /* Error code */ Fts5RankCtx *p; /* Structure to store reusable values */ int i; /* Used to iterate through phrases */ double rank = 0.0; /* UDF return value */ int bExplain = 0; char *zExplain = 0; if( sqlite4_user_data(pCtx) ) bExplain = 1; p = sqlite4_get_auxdata(pCtx, 0); if( p==0 ){ int nPhrase; /* Number of phrases in query expression */ int nByte; /* Number of bytes of data to allocate */ sqlite4_mi_phrase_count(pCtx, &nPhrase); nByte = sizeof(Fts5RankCtx) + nPhrase * sizeof(double); p = (Fts5RankCtx *)sqlite4DbMallocZero(db, nByte); sqlite4_set_auxdata(pCtx, 0, (void *)p, fts5RankFreeCtx); |
︙ | ︙ | |||
102 103 104 105 106 107 108 109 110 111 112 113 114 115 | p->aIdf = (double *)&p[1]; /* Determine the IDF weight for each phrase in the query. */ rc = sqlite4_mi_total_rows(pCtx, &N); for(i=0; rc==SQLITE4_OK && i<nPhrase; i++){ rc = sqlite4_mi_row_count(pCtx, -1, -1, i, &ni); if( rc==SQLITE4_OK ){ p->aIdf[i] = log((0.5 + N - ni) / (0.5 + ni)); } } /* Determine the average document length */ if( rc==SQLITE4_OK ){ int nTotal; | > | 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | p->aIdf = (double *)&p[1]; /* Determine the IDF weight for each phrase in the query. */ rc = sqlite4_mi_total_rows(pCtx, &N); for(i=0; rc==SQLITE4_OK && i<nPhrase; i++){ rc = sqlite4_mi_row_count(pCtx, -1, -1, i, &ni); if( rc==SQLITE4_OK ){ assert( ni<=N ); p->aIdf[i] = log((0.5 + N - ni) / (0.5 + ni)); } } /* Determine the average document length */ if( rc==SQLITE4_OK ){ int nTotal; |
︙ | ︙ | |||
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | ** in this row (within any column). And dl to the number of tokens in ** the current row (again, in any column). */ rc = sqlite4_mi_match_count(pCtx, -1, -1, i, &tf); if( rc==SQLITE4_OK ) rc = sqlite4_mi_size(pCtx, -1, -1, &dl); /* Calculate the normalized document length */ L = (double)dl / p->avgdl; /* Calculate the contribution to the rank made by this phrase. Then ** add it to variable rank. */ prank = (p->aIdf[i] * tf) / (k1 * ( (1.0 - b) + b * L) + tf); rank += prank; } if( rc==SQLITE4_OK ){ | > > > > > > > > > > > > > > > > > > > > | > | 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | ** in this row (within any column). And dl to the number of tokens in ** the current row (again, in any column). */ rc = sqlite4_mi_match_count(pCtx, -1, -1, i, &tf); if( rc==SQLITE4_OK ) rc = sqlite4_mi_size(pCtx, -1, -1, &dl); /* Calculate the normalized document length */ L = (double)dl / p->avgdl; /* Calculate the contribution to the rank made by this phrase. Then ** add it to variable rank. */ prank = (p->aIdf[i] * tf) / (k1 * ( (1.0 - b) + b * L) + tf); rank += prank; if( bExplain ){ zExplain = sqlite4MAppendf( db, zExplain, "%s(idf=%.2f L=%.2f tf=%d) rank=%.2f", zExplain, p->aIdf[i], L, tf, prank ); if( (i+1)<p->nPhrase ){ zExplain = sqlite4MAppendf(db, zExplain, "%s<br>", zExplain); } } } if( rc==SQLITE4_OK ){ if( bExplain ){ if( p->nPhrase>1 ){ zExplain = sqlite4MAppendf( db, zExplain, "%s<br>total=%.2f", zExplain, rank ); } sqlite4_result_text(pCtx, zExplain, -1, SQLITE4_TRANSIENT); sqlite4DbFree(db, zExplain); }else{ sqlite4_result_double(pCtx, rank); } }else{ sqlite4_result_error_code(pCtx, rc); } } typedef struct Snippet Snippet; typedef struct SnippetText SnippetText; |
︙ | ︙ | |||
211 212 213 214 215 216 217 | ){ SnippetCtx *p = (SnippetCtx *)pCtx; if( iOff<p->iOff ){ return 0; }else if( iOff>=(p->iOff + p->nToken) ){ fts5SnippetAppend(p, &p->zText[p->iFrom], p->iTo - p->iFrom); | | | | 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | ){ SnippetCtx *p = (SnippetCtx *)pCtx; if( iOff<p->iOff ){ return 0; }else if( iOff>=(p->iOff + p->nToken) ){ fts5SnippetAppend(p, &p->zText[p->iFrom], p->iTo - p->iFrom); fts5SnippetAppend(p, p->zEllipses, -1); p->iFrom = -1; return 1; }else{ int bHighlight; /* True to highlight term */ bHighlight = (p->mask & ((u64)1 << (iOff-p->iOff))) ? 1 : 0; if( p->iFrom==0 && p->iOff!=0 ){ p->iFrom = iSrc; if( p->pOut->nOut==0 ) fts5SnippetAppend(p, p->zEllipses, -1); } if( bHighlight ){ |
︙ | ︙ | |||
328 329 330 331 332 333 334 | int nScore = 0; int nPTok; int iPTok; if( iColumn>=0 && iColumn!=iCol ) continue; | | | > | | > > > > > | 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 | int nScore = 0; int nPTok; int iPTok; if( iColumn>=0 && iColumn!=iCol ) continue; allmask |= ((u64)1 << iPhrase); nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100); for(iMask=0; iMask<nPhrase; iMask++){ if( nShift<64){ aMask[iMask] = aMask[iMask] >> nShift; }else{ aMask[iMask] = 0; } } sqlite4_mi_phrase_token_count(pCtx, iPhrase, &nPTok); for(iPTok=0; iPTok<nPTok; iPTok++){ aMask[iPhrase] = aMask[iPhrase] | ((u64)1 << (nToken-1+iPTok)); } for(iMask=0; iMask<nPhrase; iMask++){ int iBit; if( aMask[iMask] ){ nScore += ((((u64)1 << iMask) & mask) ? 100 : 1); }else{ miss |= ((u64)1 << iMask); } tmask = tmask | aMask[iMask]; /* TODO: This is the Hamming Weight. There are much more efficient ** ways to calculate it. */ for(iBit=0; iBit<nToken; iBit++){ if( tmask & ((u64)1 << iBit) ) nScore++; } } if( nScore>nBest ){ hlmask = tmask; missmask = miss; nBest = nScore; iBestOff = iOff; |
︙ | ︙ | |||
390 391 392 393 394 395 396 | int nLead = 0; int nShift = 0; u64 mask = pSnip->hlmask; int iOff = pSnip->iOff; if( mask==0 ) return; | | | | 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 | int nLead = 0; int nShift = 0; u64 mask = pSnip->hlmask; int iOff = pSnip->iOff; if( mask==0 ) return; assert( mask & ((u64)1 << (nToken-1)) ); for(i=0; (mask & ((u64)1 << i))==0; i++); nLead = i; nShift = (nLead/2); if( iOff+nShift > nSz-nToken ) nShift = (nSz-nToken) - iOff; if( iOff+nShift < 0 ) nShift = -1 * iOff; iOff += nShift; |
︙ | ︙ | |||
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 | rc = sqlite4_create_tokenizer(db, "simple", (void *)pEnv, fts5SimpleCreate, fts5SimpleTokenize, fts5SimpleDestroy ); if( rc!=SQLITE4_OK ) return rc; rc = sqlite4_create_mi_function(db, "rank", 0, SQLITE4_UTF8, 0, fts5Rank, 0); if( rc!=SQLITE4_OK ) return rc; rc = sqlite4_create_mi_function( db, "snippet", -1, SQLITE4_UTF8, 0, fts5Snippet, 0 ); return rc; } | > > > > | 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 | rc = sqlite4_create_tokenizer(db, "simple", (void *)pEnv, fts5SimpleCreate, fts5SimpleTokenize, fts5SimpleDestroy ); if( rc!=SQLITE4_OK ) return rc; rc = sqlite4_create_mi_function(db, "rank", 0, SQLITE4_UTF8, 0, fts5Rank, 0); if( rc!=SQLITE4_OK ) return rc; rc = sqlite4_create_mi_function( db, "erank", 0, SQLITE4_UTF8, (void *)1, fts5Rank, 0 ); if( rc!=SQLITE4_OK ) return rc; rc = sqlite4_create_mi_function( db, "snippet", -1, SQLITE4_UTF8, 0, fts5Snippet, 0 ); return rc; } |
Changes to src/insert.c.
︙ | ︙ | |||
1418 1419 1420 1421 1422 1423 1424 | /* Write the entry to each index. */ for(i=0, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){ assert( pIdx->eIndexType!=SQLITE4_INDEX_PRIMARYKEY || aRegIdx[i] ); if( pIdx->eIndexType==SQLITE4_INDEX_FTS5 ){ int iPK; sqlite4FindPrimaryKey(pTab, &iPK); | | | 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 | /* Write the entry to each index. */ for(i=0, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){ assert( pIdx->eIndexType!=SQLITE4_INDEX_PRIMARYKEY || aRegIdx[i] ); if( pIdx->eIndexType==SQLITE4_INDEX_FTS5 ){ int iPK; sqlite4FindPrimaryKey(pTab, &iPK); sqlite4Fts5CodeUpdate(pParse, pIdx, 0, aRegIdx[iPK], regContent, 0); } else if( aRegIdx[i] ){ int regData = 0; int flags = 0; if( pIdx->eIndexType==SQLITE4_INDEX_PRIMARYKEY ){ regData = regRec; flags = pik_flags; |
︙ | ︙ |
Changes to src/rowset.c.
︙ | ︙ | |||
154 155 156 157 158 159 160 | memset(p, 0, sizeof(RowSet)); p->isSorted = 1; } static u8 *rowsetAllocateChunk(RowSet *p, int nByte){ int rowChunkSize = ROUND8(sizeof(RowSetChunk)); | | | > > > | | 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | memset(p, 0, sizeof(RowSet)); p->isSorted = 1; } static u8 *rowsetAllocateChunk(RowSet *p, int nByte){ int rowChunkSize = ROUND8(sizeof(RowSetChunk)); RowSetChunk *pNew; /* New RowSetChunk */ int nAlloc; /* Bytes to request from malloc() */ nAlloc = rowChunkSize + nByte; pNew = (RowSetChunk *)sqlite4DbMallocRaw(p->db, nAlloc); if( !pNew ) return 0; pNew->pNextChunk = p->pChunk; p->pChunk = pNew; return (u8 *)(&pNew[1]); } static int rowsetEntryKeyCmp(RowSetEntry *pLeft, const u8 *aKey, int nKey){ int nCmp = SQLITE4_MIN(pLeft->nKey, nKey); int res; res = memcmp(pLeft->aKey, aKey, nCmp); return (res ? res : (pLeft->nKey - nKey)); |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
3265 3266 3267 3268 3269 3270 3271 | void sqlite4ShutdownFts5(sqlite4 *db); void sqlite4CreateUsingIndex(Parse*, CreateIndex*, ExprList*, Token*, Token*); int sqlite4Fts5IndexSz(void); void sqlite4Fts5IndexInit(Parse *, Index *, ExprList *); void sqlite4Fts5IndexFree(sqlite4 *, Index *); | | | | 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 | void sqlite4ShutdownFts5(sqlite4 *db); void sqlite4CreateUsingIndex(Parse*, CreateIndex*, ExprList*, Token*, Token*); int sqlite4Fts5IndexSz(void); void sqlite4Fts5IndexInit(Parse *, Index *, ExprList *); void sqlite4Fts5IndexFree(sqlite4 *, Index *); int sqlite4Fts5Update(sqlite4 *, Fts5Info *, int, Mem *, Mem *, int, char **); void sqlite4Fts5FreeInfo(sqlite4 *db, Fts5Info *); void sqlite4Fts5CodeUpdate(Parse *, Index *pIdx, int, int, int, int); void sqlite4Fts5CodeCksum(Parse *, Index *, int, int, int); void sqlite4Fts5CodeQuery(Parse *, Index *, int, int, int); int sqlite4Fts5Pk(Fts5Cursor *, int, KVByteArray **, KVSize *); int sqlite4Fts5Next(Fts5Cursor *pCsr); int sqlite4Fts5EntryCksum(sqlite4 *, Fts5Info *, Mem *, Mem *, i64 *); |
︙ | ︙ |
Changes to src/vdbe.c.
︙ | ︙ | |||
4847 4848 4849 4850 4851 4852 4853 | sqlite4DebugPrintf("SQL-trace: %s\n", zTrace); } #endif /* SQLITE4_DEBUG */ break; } #endif | | > > > > > > > > > > > > | | 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 | sqlite4DebugPrintf("SQL-trace: %s\n", zTrace); } #endif /* SQLITE4_DEBUG */ break; } #endif /* Opcode: FtsUpdate P1 P2 P3 P4 P5 ** ** This opcode is used to write to an FTS index. P4 points to an Fts5Info ** object describing the index. ** ** If argument P5 is non-zero, then entries are removed from the FTS index. ** If it is zero, then entries are inserted. In other words, when a row ** is deleted from a table with an FTS index, this opcode is invoked with ** P5==1. When a row is inserted, it is invoked with P5==0. If an existing ** row is updated, this opcode is invoked twice - once with P5==1 and then ** again with P5==0. ** ** Register P1 contains the PK (a blob in key format) of the affected row. ** P3 is the first in an array of N registers, where N is the number of ** columns in the indexed table. Each register contains the value for the ** corresponding table column. ** ** If P2 is non-zero, then it is a register containing the root page number ** of the fts index to update. If it is zero, then the root page of the ** index is available as part of the Fts5Info structure. */ case OP_FtsUpdate: { Fts5Info *pInfo; /* Description of fts5 index to update */ Mem *pKey; /* Primary key of indexed row */ Mem *aArg; /* Pointer to array of N arguments */ Mem *pRoot; /* Root page number */ int iRoot; assert( pOp->p4type==P4_FTS5INFO ); pInfo = pOp->p4.pFtsInfo; aArg = &aMem[pOp->p3]; pKey = &aMem[pOp->p1]; if( pOp->p2 ){ iRoot = aMem[pOp->p2].u.i; }else{ iRoot = 0; } rc = sqlite4Fts5Update(db, pInfo, iRoot, pKey, aArg, pOp->p5, &p->zErrMsg); break; } /* ** Opcode: FtsCksum P1 * P3 P4 P5 ** ** This opcode is used by the integrity-check procedure that verifies that |
︙ | ︙ |
Changes to test/fts5create.test.
︙ | ︙ | |||
70 71 72 73 74 75 76 | CREATE INDEX ft ON t2 USING fts5(tukenizer=simple); } {1 {unrecognized argument: "tukenizer"}} do_catchsql_test 2.3 { CREATE INDEX ft ON t2 USING fts5("a b c"); } {1 {unrecognized argument: "a b c"}} | < > > > > > > > > > > > > > > > > | 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | CREATE INDEX ft ON t2 USING fts5(tukenizer=simple); } {1 {unrecognized argument: "tukenizer"}} do_catchsql_test 2.3 { CREATE INDEX ft ON t2 USING fts5("a b c"); } {1 {unrecognized argument: "a b c"}} do_catchsql_test 2.4 { CREATE INDEX ft ON t2 USING fts5(tokenizer="nosuch"); } {1 {no such tokenizer: "nosuch"}} #------------------------------------------------------------------------- # reset_db do_execsql_test 3.1 { CREATE TABLE t1(a, b, c, PRIMARY KEY(a)); INSERT INTO t1 VALUES(1, 'a b c d', 'e f g h'); INSERT INTO t1 VALUES(2, 'e f g h', 'a b c d'); } do_execsql_test 3.2 { CREATE INDEX ft ON t1 USING fts5(); PRAGMA fts_check(ft); } {ok} finish_test |
Changes to www/lsmusr.wiki.
︙ | ︙ | |||
146 147 148 149 150 151 152 | based system. Additionally, avoiding random writes in favour of largely contiguous updates (as LSM does) can significantly reduce the wear on SSD or flash memory devices. <p>Although it has quite different features to LSM in other respects, LevelDB makes similar performance tradeoffs. | | < < | 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | based system. Additionally, avoiding random writes in favour of largely contiguous updates (as LSM does) can significantly reduce the wear on SSD or flash memory devices. <p>Although it has quite different features to LSM in other respects, LevelDB makes similar performance tradeoffs. <p>Benchmark test results for LSM are <a href=lsmperf.wiki>available here</a>. <h1 id=using_lsm_in_applications>2. Using LSM in Applications </h1> <p>LSM is not currently built or distributed independently. Instead, it is part of the SQLite4 library. To use LSM in an application, the application links against libsqlite4 and includes the header file "lsm.h" in any files that access the LSM API. |
︙ | ︙ |