/ Check-in [ad8df6c5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add a couple of comments to fts3_snippet.c.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-experimental
Files: files | file ages | folders
SHA1:ad8df6c5ee0055cf1161a3b700d69dbe07d969bc
User & Date: dan 2010-11-26 15:13:32
Context
2010-11-26
15:17
Merge fts3 matchinfo() changes with trunk. check-in: 7d660b91 user: dan tags: trunk
15:13
Add a couple of comments to fts3_snippet.c. Closed-Leaf check-in: ad8df6c5 user: dan tags: fts3-experimental
10:58
Merge with latest trunk changes. check-in: 515cb3f4 user: dan tags: fts3-experimental
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_snippet.c.

978
979
980
981
982
983
984






985
986
987
988
989
990
991
992
993




994
995
996
997
998
999
1000
1001
1002
1003
1004
1005





1006
1007
1008
1009
1010
1011
1012
....
1023
1024
1025
1026
1027
1028
1029











1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
....
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060



1061
1062
1063
1064
1065




1066
1067
1068
1069
1070
1071
1072
1073

1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088

1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099

















1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
....
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237

1238
1239
1240
1241
1242
1243
1244
  a += sqlite3Fts3GetVarint(a, &nDoc);
  *pnDoc = (u32)nDoc;

  if( paLen ) *paLen = a;
  return SQLITE_OK;
}







typedef struct LcsIterator LcsIterator;
struct LcsIterator {
  Fts3Expr *pExpr;                /* Pointer to phrase expression */
  char *pRead;                    /* Cursor used to iterate through aDoclist */
  int iPosOffset;                 /* Tokens count up to end of this phrase */
  int iCol;                       /* Current column number */
  int iPos;                       /* Current position */
};





#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;

static int fts3MatchinfoLcsCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  LcsIterator *aIter = (LcsIterator *)pCtx;
  aIter[iPhrase].pExpr = pExpr;
  return SQLITE_OK;
}






static int fts3LcsIteratorAdvance(LcsIterator *pIter){
  char *pRead = pIter->pRead;
  sqlite3_int64 iRead;
  int rc = 0;

  pRead += sqlite3Fts3GetVarint(pRead, &iRead);
  if( iRead==0 ){
................................................................................
    pIter->iPos += (iRead-2);
  }

  pIter->pRead = pRead;
  return rc;
}
  











static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
  LcsIterator *aIter;
  int i;
  int iCol;
  int nToken = 0;

  /* Allocate and populate the array of LcsIterator objects. The array
  ** contains one element for each matchable phrase in the query.
  **/
  aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
  if( !aIter ) return SQLITE_NOMEM;
  memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
  (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);

  for(i=0; i<pInfo->nPhrase; i++){
    LcsIterator *pIter = &aIter[i];
    nToken -= pIter->pExpr->pPhrase->nToken;
    pIter->iPosOffset = nToken;
    pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1);
    if( pIter->pRead ){
      pIter->iPos = pIter->iPosOffset;
................................................................................
      fts3LcsIteratorAdvance(&aIter[i]);
    }else{
      pIter->iCol = LCS_ITERATOR_FINISHED;
    }
  }

  for(iCol=0; iCol<pInfo->nCol; iCol++){
    int nLcs = 0;
    int nLive = 0;




    for(i=0; i<pInfo->nPhrase; i++){
      assert( aIter[i].iCol>=iCol );
      if( aIter[i].iCol==iCol ) nLive++;
    }





    while( nLive>0 ){
      LcsIterator *pAdv = 0;
      int nThisLcs = 0;

      for(i=0; i<pInfo->nPhrase; i++){
        LcsIterator *pIter = &aIter[i];

        if( iCol!=pIter->iCol ){  

          nThisLcs = 0;
          continue;
        }

        if( pAdv==0 || pIter->iPos<pAdv->iPos ){
          pAdv = pIter;
        }

        if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
          nThisLcs++;
        }else{
          nThisLcs = 1;
        }

        if( nThisLcs>nLcs ) nLcs = nThisLcs;

      }
      if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
    }

    pInfo->aMatchinfo[iCol] = nLcs;
  }

  sqlite3_free(aIter);
  return SQLITE_OK;
}


















static int fts3MatchinfoValues(
  Fts3Cursor *pCsr,               /* FTS3 cursor object */
  int bGlobal,                    /* True to grab the global stats */
  MatchInfo *pInfo,               /* Matchinfo context object */
  const char *zArg                /* Matchinfo format string */
){
  int rc = SQLITE_OK;
  int i;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  sqlite3_stmt *pSelect = 0;

  for(i=0; rc==SQLITE_OK && zArg[i]; i++){

    switch( zArg[i] ){
      case FTS3_MATCHINFO_NPHRASE: 
        if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
        break;

      case FTS3_MATCHINFO_NCOL: 
        if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
        break;
        
      case FTS3_MATCHINFO_NDOC:
        if( bGlobal ){
          sqlite3_int64 nDoc;
          rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
................................................................................
  ** initialize those elements that are constant for every row.
  */
  if( pCsr->aMatchinfo==0 ){
    int nMatchinfo = 0;           /* Number of u32 elements in match-info */
    int nArg;                     /* Bytes in zArg */
    int i;                        /* Used to iterate through zArg */

    /* Load doclists for each phrase in the query. */
    pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
    sInfo.nPhrase = pCsr->nPhrase;


    for(i=0; zArg[i]; i++){
      nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
    }

    /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
    nArg = strlen(zArg);
    pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);







>
>
>
>
>
>









>
>
>
>












>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>













<







 







|
|

>
>
>





>
>
>
>

|
|



<

>

|
<
<
|
|
|
<
|
|
|
|
|
<
|
>











>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









<





|



|







 







|



>







978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
....
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068

1069
1070
1071
1072
1073
1074
1075
....
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103

1104
1105
1106
1107


1108
1109
1110

1111
1112
1113
1114
1115

1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154

1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
....
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
  a += sqlite3Fts3GetVarint(a, &nDoc);
  *pnDoc = (u32)nDoc;

  if( paLen ) *paLen = a;
  return SQLITE_OK;
}

/*
** An instance of the following structure is used to store state while 
** iterating through a multi-column position-list corresponding to the
** hits for a single phrase on a single row in order to calculate the
** values for a matchinfo() FTS3_MATCHINFO_LCS request.
*/
typedef struct LcsIterator LcsIterator;
struct LcsIterator {
  Fts3Expr *pExpr;                /* Pointer to phrase expression */
  char *pRead;                    /* Cursor used to iterate through aDoclist */
  int iPosOffset;                 /* Tokens count up to end of this phrase */
  int iCol;                       /* Current column number */
  int iPos;                       /* Current position */
};

/* 
** If LcsIterator.iCol is set to the following value, the iterator has
** finished iterating through all offsets for all columns.
*/
#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;

static int fts3MatchinfoLcsCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  LcsIterator *aIter = (LcsIterator *)pCtx;
  aIter[iPhrase].pExpr = pExpr;
  return SQLITE_OK;
}

/*
** Advance the iterator passed as an argument to the next position. Return
** 1 if the iterator is at EOF or if it now points to the start of the
** position list for the next column.
*/
static int fts3LcsIteratorAdvance(LcsIterator *pIter){
  char *pRead = pIter->pRead;
  sqlite3_int64 iRead;
  int rc = 0;

  pRead += sqlite3Fts3GetVarint(pRead, &iRead);
  if( iRead==0 ){
................................................................................
    pIter->iPos += (iRead-2);
  }

  pIter->pRead = pRead;
  return rc;
}
  
/*
** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. 
**
** If the call is successful, the longest-common-substring lengths for each
** column are written into the first nCol elements of the pInfo->aMatchinfo[] 
** array before returning. SQLITE_OK is returned in this case.
**
** Otherwise, if an error occurs, an SQLite error code is returned and the
** data written to the first nCol elements of pInfo->aMatchinfo[] is 
** undefined.
*/
static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
  LcsIterator *aIter;
  int i;
  int iCol;
  int nToken = 0;

  /* Allocate and populate the array of LcsIterator objects. The array
  ** contains one element for each matchable phrase in the query.
  **/
  aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
  if( !aIter ) return SQLITE_NOMEM;
  memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
  (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);

  for(i=0; i<pInfo->nPhrase; i++){
    LcsIterator *pIter = &aIter[i];
    nToken -= pIter->pExpr->pPhrase->nToken;
    pIter->iPosOffset = nToken;
    pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1);
    if( pIter->pRead ){
      pIter->iPos = pIter->iPosOffset;
................................................................................
      fts3LcsIteratorAdvance(&aIter[i]);
    }else{
      pIter->iCol = LCS_ITERATOR_FINISHED;
    }
  }

  for(iCol=0; iCol<pInfo->nCol; iCol++){
    int nLcs = 0;                 /* LCS value for this column */
    int nLive = 0;                /* Number of iterators in aIter not at EOF */

    /* Loop through the iterators in aIter[]. Set nLive to the number of
    ** iterators that point to a position-list corresponding to column iCol.
    */
    for(i=0; i<pInfo->nPhrase; i++){
      assert( aIter[i].iCol>=iCol );
      if( aIter[i].iCol==iCol ) nLive++;
    }

    /* The following loop runs until all iterators in aIter[] have finished
    ** iterating through positions in column iCol. Exactly one of the 
    ** iterators is advanced each time the body of the loop is run.
    */
    while( nLive>0 ){
      LcsIterator *pAdv = 0;      /* The iterator to advance by one position */
      int nThisLcs = 0;           /* LCS for the current iterator positions */

      for(i=0; i<pInfo->nPhrase; i++){
        LcsIterator *pIter = &aIter[i];

        if( iCol!=pIter->iCol ){  
          /* This iterator is already at EOF for this column. */
          nThisLcs = 0;
        }else{


          if( pAdv==0 || pIter->iPos<pAdv->iPos ){
            pAdv = pIter;
          }

          if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
            nThisLcs++;
          }else{
            nThisLcs = 1;
          }

          if( nThisLcs>nLcs ) nLcs = nThisLcs;
        }
      }
      if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
    }

    pInfo->aMatchinfo[iCol] = nLcs;
  }

  sqlite3_free(aIter);
  return SQLITE_OK;
}

/*
** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
** be returned by the matchinfo() function. Argument zArg contains the 
** format string passed as the second argument to matchinfo (or the
** default value "pcx" if no second argument was specified). The format
** string has already been validated and the pInfo->aMatchinfo[] array
** is guaranteed to be large enough for the output.
**
** If bGlobal is true, then populate all fields of the matchinfo() output.
** If it is false, then assume that those fields that do not change between
** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
** have already been populated.
**
** Return SQLITE_OK if successful, or an SQLite error code if an error 
** occurs. If a value other than SQLITE_OK is returned, the state the
** pInfo->aMatchinfo[] buffer is left in is undefined.
*/
static int fts3MatchinfoValues(
  Fts3Cursor *pCsr,               /* FTS3 cursor object */
  int bGlobal,                    /* True to grab the global stats */
  MatchInfo *pInfo,               /* Matchinfo context object */
  const char *zArg                /* Matchinfo format string */
){
  int rc = SQLITE_OK;
  int i;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  sqlite3_stmt *pSelect = 0;

  for(i=0; rc==SQLITE_OK && zArg[i]; i++){

    switch( zArg[i] ){
      case FTS3_MATCHINFO_NPHRASE:
        if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
        break;

      case FTS3_MATCHINFO_NCOL:
        if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
        break;
        
      case FTS3_MATCHINFO_NDOC:
        if( bGlobal ){
          sqlite3_int64 nDoc;
          rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
................................................................................
  ** initialize those elements that are constant for every row.
  */
  if( pCsr->aMatchinfo==0 ){
    int nMatchinfo = 0;           /* Number of u32 elements in match-info */
    int nArg;                     /* Bytes in zArg */
    int i;                        /* Used to iterate through zArg */

    /* Determine the number of phrases in the query */
    pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
    sInfo.nPhrase = pCsr->nPhrase;

    /* Determine the number of integers in the buffer returned by this call. */
    for(i=0; zArg[i]; i++){
      nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
    }

    /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
    nArg = strlen(zArg);
    pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);