/ Check-in [dc734c5b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve performance of editdist3() by keeping the costs in sorted order. Also add a new regression test to editdist3().
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:dc734c5b61464dfd6bfa7963f2ecce32e405a0c2ba1ef6f453ec9389da080256
User & Date: drh 2018-02-15 03:56:33
Context
2018-02-15
21:00
Do not allow parameters or schema references inside of WITH clause of triggers and views. This fixes a bug discovered by OSSFuzz and present since common-table-expressions were first added in 2014-02-03. check-in: b918d4b4 user: drh tags: trunk
03:56
Improve performance of editdist3() by keeping the costs in sorted order. Also add a new regression test to editdist3(). check-in: dc734c5b user: drh tags: trunk
03:05
Reduce the number of calls to strncmp() required to run editDist3Core(). check-in: afd6fbc0 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/misc/spellfix.c.

   653    653     memset(p, 0, sizeof(*p));
   654    654   }
   655    655   static void editDist3ConfigDelete(void *pIn){
   656    656     EditDist3Config *p = (EditDist3Config*)pIn;
   657    657     editDist3ConfigClear(p);
   658    658     sqlite3_free(p);
   659    659   }
          660  +
          661  +/* Compare the FROM values of two EditDist3Cost objects, for sorting.
          662  +** Return negative, zero, or positive if the A is less than, equal to,
          663  +** or greater than B.
          664  +*/
          665  +static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB){
          666  +  int n = pA->nFrom;
          667  +  int rc;
          668  +  if( n>pB->nFrom ) n = pB->nFrom;
          669  +  rc = strncmp(pA->a, pB->a, n);
          670  +  if( rc==0 ) rc = pA->nFrom - pB->nFrom;
          671  +  return rc;
          672  +}
          673  +
          674  +/*
          675  +** Merge together two sorted lists of EditDist3Cost objects, in order
          676  +** of increasing FROM.
          677  +*/
          678  +static EditDist3Cost *editDist3CostMerge(
          679  +  EditDist3Cost *pA,
          680  +  EditDist3Cost *pB
          681  +){
          682  +  EditDist3Cost *pHead = 0;
          683  +  EditDist3Cost **ppTail = &pHead;
          684  +  EditDist3Cost *p;
          685  +  while( pA && pB ){
          686  +    if( editDist3CostCompare(pA,pB)<=0 ){
          687  +      p = pA;
          688  +      pA = pA->pNext;
          689  +    }else{
          690  +      p = pB;
          691  +      pB = pB->pNext;
          692  +    }
          693  +    *ppTail = p;
          694  +    ppTail =  &p->pNext;
          695  +  }
          696  +  if( pA ){
          697  +    *ppTail = pA;
          698  +  }else{
          699  +    *ppTail = pB;
          700  +  }
          701  +  return pHead;
          702  +}
          703  +
          704  +/*
          705  +** Sort a list of EditDist3Cost objects into order of increasing FROM
          706  +*/
          707  +static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList){
          708  +  EditDist3Cost *ap[60], *p;
          709  +  int i;
          710  +  int mx = 0;
          711  +  ap[0] = 0;
          712  +  ap[1] = 0;
          713  +  while( pList ){
          714  +    p = pList;
          715  +    pList = p->pNext;
          716  +    p->pNext = 0;
          717  +    for(i=0; ap[i]; i++){
          718  +      p = editDist3CostMerge(ap[i],p);
          719  +      ap[i] = 0;
          720  +    }
          721  +    ap[i] = p;
          722  +    if( i>mx ){
          723  +      mx = i;
          724  +      ap[i+1] = 0;
          725  +    }
          726  +  }
          727  +  p = 0;
          728  +  for(i=0; i<=mx; i++){
          729  +    if( ap[i] ) p = editDist3CostMerge(p,ap[i]);
          730  +  }
          731  +  return p;
          732  +}
   660    733   
   661    734   /*
   662    735   ** Load all edit-distance weights from a table.
   663    736   */
   664    737   static int editDist3ConfigLoad(
   665    738     EditDist3Config *p,      /* The edit distance configuration to load */
   666    739     sqlite3 *db,            /* Load from this database */
................................................................................
   725    798         memcpy(pCost->a + nFrom, zTo, nTo);
   726    799         pCost->pNext = pLang->pCost;
   727    800         pLang->pCost = pCost; 
   728    801       }
   729    802     }
   730    803     rc2 = sqlite3_finalize(pStmt);
   731    804     if( rc==SQLITE_OK ) rc = rc2;
          805  +  if( rc==SQLITE_OK ){
          806  +    int iLang;
          807  +    for(iLang=0; iLang<p->nLang; iLang++){
          808  +      p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost);
          809  +    }
          810  +  }
   732    811     return rc;
   733    812   }
   734    813   
   735    814   /*
   736    815   ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
   737    816   ** of N.
   738    817   */
................................................................................
   939   1018     memset(a2, 0, sizeof(a2[0])*n2);
   940   1019   
   941   1020     /* Fill in the a1[] matrix for all characters of the TO string */
   942   1021     for(i2=0; i2<n2; i2++){
   943   1022       a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
   944   1023       for(p=pLang->pCost; p; p=p->pNext){
   945   1024         EditDist3Cost **apNew;
   946         -      if( p->nFrom>0 ) continue;
         1025  +      if( p->nFrom>0 ) break;
   947   1026         if( i2+p->nTo>n2 ) continue;
         1027  +      if( p->a[0]>z2[i2] ) break;
   948   1028         if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
   949   1029         a2[i2].nIns++;
   950   1030         apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
   951   1031         if( apNew==0 ){
   952   1032           res = -1;  /* Out of memory */
   953   1033           goto editDist3Abort;
   954   1034         }

Changes to test/spellfix4.test.

    93     93     INSERT INTO cost1 VALUES
    94     94       (0, '', '?',  97),
    95     95       (0, '?', '',  98),
    96     96       (0, '?', '?', 99),
    97     97       (0, 'a', 'e', 50),
    98     98       (0, 'a', 'i', 70),
    99     99       (0, 'a', 'o', 75),
   100         -    (0, 'a', 'u', 85),
          100  +    (0, 'a', 'u', 81),
   101    101       (0, 'e', 'a', 50),
   102         -    (0, 'e', 'i', 50),
   103         -    (0, 'e', 'o', 75),
   104         -    (0, 'e', 'u', 85),
          102  +    (0, 'e', 'i', 52),
          103  +    (0, 'e', 'o', 72),
          104  +    (0, 'e', 'u', 82),
   105    105       (0, 'i', 'a', 70),
   106         -    (0, 'i', 'e', 50),
          106  +    (0, 'i', 'e', 52),
   107    107       (0, 'i', 'o', 75),
   108         -    (0, 'i', 'u', 85),
          108  +    (0, 'i', 'u', 83),
   109    109       (0, 'o', 'a', 75),
   110         -    (0, 'o', 'e', 75),
          110  +    (0, 'o', 'e', 72),
   111    111       (0, 'o', 'i', 75),
   112    112       (0, 'o', 'u', 40),
   113         -    (0, 'u', 'a', 85),
   114         -    (0, 'u', 'e', 85),
   115         -    (0, 'u', 'i', 85),
          113  +    (0, 'u', 'a', 81),
          114  +    (0, 'u', 'e', 82),
          115  +    (0, 'u', 'i', 83),
   116    116       (0, 'u', 'o', 40),
   117    117       (0, 'm', 'n', 45),
   118    118       (0, 'n', 'm', 45)
   119    119     ;
   120    120     CREATE TABLE words(x TEXT);
   121    121     INSERT INTO words VALUES
   122    122      ('abraham'),
................................................................................
   336    336   } {{}}
   337    337   do_execsql_test 310 {
   338    338     SELECT editdist3(a.x,b.x), a.x, b.x
   339    339       FROM words a, words b
   340    340      WHERE a.x<b.x
   341    341      ORDER BY 1, 2
   342    342      LIMIT 20
   343         -} {139 bucket pocket 149 manual mental 150 meter motor 169 crack trick 173 sinatra sonata 174 edition emotion 174 major motor 174 risk rose 174 state stone 194 deal detail 196 alert talent 196 analog catalog 196 deal legal 196 ford forum 196 risk trick 196 stone strong 197 china tina 197 congo logo 197 diana tina 197 florida gloria}
   344         -
          343  +} {139 bucket pocket 144 meter motor 149 manual mental 169 crack trick 173 sinatra sonata 174 edition emotion 174 major motor 174 risk rose 174 state stone 194 deal detail 196 alert talent 196 analog catalog 196 deal legal 196 ford forum 196 risk trick 196 stone strong 197 china tina 197 congo logo 197 diana tina 197 florida gloria}
          344  +do_execsql_test 320 {
          345  +  SELECT md5sum(ed||'/'||sx||'/'||sy||',') FROM (
          346  +      SELECT editdist3(a.x,b.x) AS ed, a.x AS sx, b.x AS sy
          347  +        FROM words a, words b
          348  +       WHERE a.x<b.x
          349  +       ORDER BY 1, 2
          350  +  )
          351  +} {69d0a31872203a775e19325ea98cd053}
   345    352   
   346    353   finish_test