Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Still more performance enhancements to the LIKE and GLOB operators. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
6c8924cacc2b875270770fed2cc3b188 |
User & Date: | drh 2014-09-25 11:08:57.081 |
Context
2014-09-25
| ||
12:31 | Simplification to the random rowid picking logic that begins running when the maximum possible rowid has already been used. (check-in: 1330c72e17 user: drh tags: trunk) | |
11:08 | Still more performance enhancements to the LIKE and GLOB operators. (check-in: 6c8924cacc user: drh tags: trunk) | |
03:51 | More performance optimization for the LIKE and GLOB operators. (check-in: 5ab1023d6c user: drh tags: trunk) | |
Changes
Changes to src/func.c.
︙ | ︙ | |||
581 582 583 584 585 586 587 | static const struct compareInfo likeInfoNorm = { '%', '_', 0, 1 }; /* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator ** is case sensitive causing 'a' LIKE 'A' to be false */ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; /* ** Compare two UTF-8 strings for equality where the first string can | | | | | | > > > > > > > | < < | | | | > | > > > | > > | > > > > > > > > > > > | | | > | > | < > > | | < | < < | | | < | < < | < < | < > | | | 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 | static const struct compareInfo likeInfoNorm = { '%', '_', 0, 1 }; /* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator ** is case sensitive causing 'a' LIKE 'A' to be false */ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; /* ** Compare two UTF-8 strings for equality where the first string can ** potentially be a "glob" or "like" expression. Return true (1) if they ** are the same and false (0) if they are different. ** ** Globbing rules: ** ** '*' Matches any sequence of zero or more characters. ** ** '?' Matches exactly one character. ** ** [...] Matches one character from the enclosed list of ** characters. ** ** [^...] Matches one character not in the enclosed list. ** ** With the [...] and [^...] matching, a ']' character can be included ** in the list by making it the first character after '[' or '^'. A ** range of characters can be specified using '-'. Example: ** "[a-z]" matches any single lower-case letter. To match a '-', make ** it the last character in the list. ** ** Like matching rules: ** ** '%' Matches any sequence of zero or more characters ** *** '_' Matches any one character ** ** Ec Where E is the "esc" character and c is any other ** character, including '%', '_', and esc, match exactly c. ** ** The comments through this routine usually assume glob matching. ** ** This routine is usually quick, but can be N**2 in the worst case. */ static int patternCompare( const u8 *zPattern, /* The glob pattern */ const u8 *zString, /* The string to compare against the glob */ const struct compareInfo *pInfo, /* Information about how to do the compare */ u32 esc /* The escape character */ ){ u32 c, c2; /* Next pattern and input string chars */ u32 matchOne = pInfo->matchOne; /* "?" or "_" */ u32 matchAll = pInfo->matchAll; /* "*" or "%" */ u32 matchOther; /* "[" or the escape character */ u8 noCase = pInfo->noCase; /* True if uppercase==lowercase */ const u8 *zEscaped = 0; /* One past the last escaped input char */ /* The GLOB operator does not have an ESCAPE clause. And LIKE does not ** have the matchSet operator. So we either have to look for one or ** the other, never both. Hence the single variable matchOther is used ** to store the one we have to look for. */ matchOther = esc ? esc : pInfo->matchSet; while( (c = sqlite3Utf8Read(&zPattern))!=0 ){ if( c==matchAll ){ /* Match "*" */ /* Skip over multiple "*" characters in the pattern. If there ** are also "?" characters, skip those as well, but consume a ** single character of the input string for each "?" skipped */ while( (c=sqlite3Utf8Read(&zPattern)) == matchAll || c == matchOne ){ if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){ return 0; } } if( c==0 ){ return 1; /* "*" at the end of the pattern matches */ }else if( c==matchOther ){ if( esc ){ c = sqlite3Utf8Read(&zPattern); if( c==0 ) return 0; }else{ /* "[...]" immediately follows the "*". We have to do a slow ** recursive search in this case, but it is an unusual case. */ assert( matchOther<0x80 ); /* '[' is a single-byte character */ while( *zString && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){ SQLITE_SKIP_UTF8(zString); } return *zString!=0; } } /* At this point variable c contains the first character of the ** pattern string past the "*". Search in the input string for the ** first matching character and recursively contine the match from ** that point. ** ** For a case-insensitive search, set variable cx to be the same as ** c but in the other case and search the input string for either ** c or cx. */ if( c<=0x80 ){ u32 cx; if( noCase ){ cx = sqlite3Toupper(c); c = sqlite3Tolower(c); }else{ cx = c; } while( (c2 = *(zString++))!=0 ){ if( c2!=c && c2!=cx ) continue; if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } }else{ while( (c2 = sqlite3Utf8Read(&zString))!=0 ){ if( c2!=c ) continue; if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } } return 0; } if( c==matchOther ){ if( esc ){ c = sqlite3Utf8Read(&zPattern); if( c==0 ) return 0; zEscaped = zPattern; }else{ u32 prior_c = 0; int seen = 0; int invert = 0; c = sqlite3Utf8Read(&zString); if( c==0 ) return 0; c2 = sqlite3Utf8Read(&zPattern); if( c2=='^' ){ invert = 1; c2 = sqlite3Utf8Read(&zPattern); } |
︙ | ︙ | |||
716 717 718 719 720 721 722 | return 0; } continue; } } c2 = sqlite3Utf8Read(&zString); if( c==c2 ) continue; | | | < > > | | 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 | return 0; } continue; } } c2 = sqlite3Utf8Read(&zString); if( c==c2 ) continue; if( noCase && c<0x80 && c2<0x80 && sqlite3Tolower(c)==sqlite3Tolower(c2) ){ continue; } if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue; return 0; } return *zString==0; } /* ** The sqlite3_strglob() interface. */ |
︙ | ︙ |