Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add syntax to fts5 used to specify that a phrase or NEAR group should match a subset of columns. For example "[col1 col2 ...] : <phrase>". |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
0fc0ea20920615f3e48ea2dbe2b7dcd9 |
User & Date: | dan 2015-05-29 15:55:30.046 |
Context
2015-05-29
| ||
19:00 | Add extra tests and fixes for multi-column matches. (check-in: ae6794ffa2 user: dan tags: fts5) | |
15:55 | Add syntax to fts5 used to specify that a phrase or NEAR group should match a subset of columns. For example "[col1 col2 ...] : <phrase>". (check-in: 0fc0ea2092 user: dan tags: fts5) | |
2015-05-28
| ||
19:57 | Optimizations for fts5 queries that match against a specific column. (check-in: b29ac50af0 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | |||
507 508 509 510 511 512 513 514 515 516 517 518 519 520 | 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 | + | */ typedef struct Fts5Expr Fts5Expr; typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5Parse Fts5Parse; typedef struct Fts5Token Fts5Token; typedef struct Fts5ExprPhrase Fts5ExprPhrase; typedef struct Fts5ExprNearset Fts5ExprNearset; typedef struct Fts5ExprColset Fts5ExprColset; struct Fts5Token { const char *p; /* Token text (not NULL terminated) */ int n; /* Size of buffer p in bytes */ }; /* Parse a MATCH expression. */ |
︙ | |||
573 574 575 576 577 578 579 580 581 582 583 584 585 | 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 | + + + + + + - + | ); Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5Parse*, Fts5ExprNearset*, Fts5ExprPhrase* ); Fts5ExprColset *sqlite3Fts5ParseColset( Fts5Parse*, Fts5ExprColset*, Fts5Token * ); void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); |
︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | |||
75 76 77 78 79 80 81 82 83 84 85 86 87 | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | + + + + + + + + + + - + | struct Fts5ExprPhrase { Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ Fts5Buffer poslist; /* Current position list */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; /* ** If a NEAR() clump may only match a specific set of columns, then ** Fts5ExprNearset.pColset points to an object of the following type. ** Each entry in the aiCol[] array */ struct Fts5ExprColset { int nCol; int aiCol[1]; }; /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. */ struct Fts5ExprNearset { int nNear; /* NEAR parameter */ |
︙ | |||
132 133 134 135 136 137 138 139 140 141 142 143 144 145 | 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | + + | while( fts5ExprIsspace(*z) ) z++; pToken->p = z; pToken->n = 1; switch( *z ){ case '(': tok = FTS5_LP; break; case ')': tok = FTS5_RP; break; case '[': tok = FTS5_LSP; break; case ']': tok = FTS5_RSP; break; case ':': tok = FTS5_COLON; break; case ',': tok = FTS5_COMMA; break; case '+': tok = FTS5_PLUS; break; case '*': tok = FTS5_STAR; break; case '\0': tok = FTS5_EOF; break; case '"': { |
︙ | |||
271 272 273 274 275 276 277 | 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | - | pNew->nPhrase = 1; pNew->apExprPhrase = apPhrase; pNew->apExprPhrase[0] = pCopy; pNode->eType = FTS5_STRING; pNode->pNear = pNear; |
︙ | |||
331 332 333 334 335 336 337 | 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | - + + | ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
︙ | |||
660 661 662 663 664 665 666 | 672 673 674 675 676 677 678 679 680 681 682 683 684 685 | - | ** contain any entries for column iCol, return 0. */ static int fts5ExprExtractCol( const u8 **pa, /* IN/OUT: Pointer to poslist */ int n, /* IN: Size of poslist in bytes */ int iCol /* Column to extract from poslist */ ){ |
︙ | |||
712 713 714 715 716 717 718 | 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 | - + + + - - + + - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - + - + - + | Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ ){ Fts5ExprNearset *pNear = pNode->pNear; int rc = SQLITE_OK; while( 1 ){ |
︙ | |||
1148 1149 1150 1151 1152 1153 1154 | 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 | - | if( pNear==0 ){ int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); pRet = sqlite3_malloc(nByte); if( pRet==0 ){ pParse->rc = SQLITE_NOMEM; }else{ memset(pRet, 0, nByte); |
︙ | |||
1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 | 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 | + | */ void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ if( pNear ){ int i; for(i=0; i<pNear->nPhrase; i++){ fts5ExprPhraseFree(pNear->apPhrase[i]); } sqlite3_free(pNear->pColset); sqlite3_free(pNear); } } void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ assert( pParse->pExpr==0 ); pParse->pExpr = p; |
︙ | |||
1309 1310 1311 1312 1313 1314 1315 | 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 | - + | pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p ); } } void sqlite3Fts5ParseSetDistance( Fts5Parse *pParse, |
︙ | |||
1331 1332 1333 1334 1335 1336 1337 | 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + - - - + + + - - + + + + + + + + + + + + + + + + + + + + + + + + | } }else{ nNear = FTS5_DEFAULT_NEARDIST; } pNear->nNear = nNear; } /* ** The second argument passed to this function may be NULL, or it may be ** an existing Fts5ExprColset object. This function returns a pointer to ** a new colset object containing the contents of (p) with new value column ** number iCol appended. ** ** If an OOM error occurs, store an error code in pParse and return NULL. ** The old colset object (if any) is not freed in this case. */ static Fts5ExprColset *fts5ParseColset( Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ Fts5ExprColset *p, /* Existing colset object */ int iCol /* New column to add to colset object */ ){ int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ Fts5ExprColset *pNew; /* New colset object to return */ assert( pParse->rc==SQLITE_OK ); assert( iCol>=0 && iCol<pParse->pConfig->nCol ); pNew = sqlite3_realloc(p, sizeof(Fts5ExprColset) + sizeof(int)*nCol); if( pNew==0 ){ pParse->rc = SQLITE_NOMEM; }else{ int *aiCol = pNew->aiCol; int i, j; for(i=0; i<nCol; i++){ if( aiCol[i]==iCol ) return pNew; if( aiCol[i]>iCol ) break; } for(j=nCol; j>i; j--){ aiCol[j] = aiCol[j-1]; } aiCol[i] = iCol; pNew->nCol = nCol+1; #ifndef NDEBUG /* Check that the array is in order and contains no duplicate entries. */ for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); #endif } return pNew; } |
︙ | |||
1459 1460 1461 1462 1463 1464 1465 | 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 | - - + + + + + + + + + + + + | if( pExpr->eType==FTS5_STRING ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm; zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd); if( zRet==0 ) return 0; |
︙ | |||
1526 1527 1528 1529 1530 1531 1532 | 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 | - - + + + | static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ char *zRet = 0; if( pExpr->eType==FTS5_STRING ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm; |
︙ |
Changes to ext/fts5/fts5_index.c.
︙ | |||
4475 4476 4477 4478 4479 4480 4481 | 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 | - - | } /* ** Move to the next matching rowid. */ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); |
︙ |
Changes to ext/fts5/fts5parse.y.
︙ | |||
91 92 93 94 95 96 97 | 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | - - + + + + + + + + + + + + + + + + + + + | exprlist(A) ::= exprlist(X) cnearset(Y). { A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0); } cnearset(A) ::= nearset(X). { A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); } |
︙ |
Changes to ext/fts5/test/fts5ac.test.
︙ | |||
121 122 123 124 125 126 127 128 129 130 131 132 133 | 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | + + + + + + + + + + + + + + + + + + + - + | 95 {h b n j t k i h o q u} {w n g i t o k c a m y p f l x c p} 96 {f c x p y r b m o l m o a} {p c a q s u n n x d c f a o} 97 {u h h k m n k} {u b v n u a o c} 98 {s p e t c z d f n w f} {l s f j b l c e s h} 99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o} } #------------------------------------------------------------------------- # Usage: # # poslist aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... # # This command is used to test if a document (set of column values) matches # the logical equivalent of a single FTS5 NEAR() clump and, if so, return # the equivalent of an FTS5 position list. # # Parameter $aCol is passed a list of the column values for the document # to test. Parameters $phrase1 and so on are the phrases. # # The result is a list of phrase hits. Each phrase hit is formatted as # three integers separated by "." characters, in the following format: # # <phrase number> . <column number> . <token offset> # # Options: # # -near N (NEAR distance. Default 10) # -col C (List of column indexes to match against) # -pc VARNAME (variable in caller frame to use for phrase numbering) # proc poslist {aCol args} { set O(-near) 10 |
︙ | |||
157 158 159 160 161 162 163 | 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | - - + | set A($j,$i) [list] } } set iCol -1 foreach col $aCol { incr iCol |
︙ | |||
357 358 359 360 361 362 363 | 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 | - - - - + + + + + + + + + + + + + + + + + + | } $res } #------------------------------------------------------------------------- # Queries on a specific column. # foreach {tn expr} { |
︙ |