Index: ext/fts3/fts3.c
==================================================================
--- ext/fts3/fts3.c
+++ ext/fts3/fts3.c
@@ -467,10 +467,11 @@
   }
   sqlite3_free(p->zSegmentsTbl);
   sqlite3_free(p->zReadExprlist);
   sqlite3_free(p->zWriteExprlist);
   sqlite3_free(p->zContentTbl);
+  sqlite3_free(p->zLanguageid);
 
   /* Invoke the tokenizer destructor to free the tokenizer. */
   p->pTokenizer->pModule->xDestroy(p->pTokenizer);
 
   sqlite3_free(p);
@@ -543,11 +544,13 @@
   if( *pRc==SQLITE_OK ){
     int i;                        /* Iterator variable */
     int rc;                       /* Return code */
     char *zSql;                   /* SQL statement passed to declare_vtab() */
     char *zCols;                  /* List of user defined columns */
+    const char *zLanguageid;
 
+    zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
     sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
 
     /* Create a list of user columns for the virtual table */
     zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
     for(i=1; zCols && i<p->nColumn; i++){
@@ -554,11 +557,12 @@
       zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
     }
 
     /* Create the whole "CREATE TABLE" statement to pass to SQLite */
     zSql = sqlite3_mprintf(
-        "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
+        "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", 
+        zCols, p->zName, zLanguageid
     );
     if( !zCols || !zSql ){
       rc = SQLITE_NOMEM;
     }else{
       rc = sqlite3_declare_vtab(p->db, zSql);
@@ -583,18 +587,22 @@
   int rc = SQLITE_OK;             /* Return code */
   int i;                          /* Iterator variable */
   sqlite3 *db = p->db;            /* The database connection */
 
   if( p->zContentTbl==0 ){
+    const char *zLanguageid = p->zLanguageid;
     char *zContentCols;           /* Columns of %_content table */
 
     /* Create a list of user columns for the content table */
     zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
     for(i=0; zContentCols && i<p->nColumn; i++){
       char *z = p->azColumn[i];
       zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
     }
+    if( zLanguageid && zContentCols ){
+      zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
+    }
     if( zContentCols==0 ) rc = SQLITE_NOMEM;
   
     /* Create the content table */
     fts3DbExec(&rc, db, 
        "CREATE TABLE %Q.'%q_content'(%s)",
@@ -790,18 +798,24 @@
     }
     fts3Appendf(pRc, &zRet, "docid");
     for(i=0; i<p->nColumn; i++){
       fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
     }
+    if( p->zLanguageid ){
+      fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
+    }
     sqlite3_free(zFree);
   }else{
     fts3Appendf(pRc, &zRet, "rowid");
     for(i=0; i<p->nColumn; i++){
       fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
     }
+    if( p->zLanguageid ){
+      fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
+    }
   }
-  fts3Appendf(pRc, &zRet, "FROM '%q'.'%q%s' AS x", 
+  fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", 
       p->zDb,
       (p->zContentTbl ? p->zContentTbl : p->zName),
       (p->zContentTbl ? "" : "_content")
   );
   return zRet;
@@ -840,10 +854,13 @@
   }
   fts3Appendf(pRc, &zRet, "?");
   for(i=0; i<p->nColumn; i++){
     fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
   }
+  if( p->zLanguageid ){
+    fts3Appendf(pRc, &zRet, ", ?");
+  }
   sqlite3_free(zFree);
   return zRet;
 }
 
 /*
@@ -1055,10 +1072,11 @@
   int bDescIdx = 0;               /* True to store descending indexes */
   char *zPrefix = 0;              /* Prefix parameter value (or NULL) */
   char *zCompress = 0;            /* compress=? parameter (or NULL) */
   char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */
   char *zContent = 0;             /* content=? parameter (or NULL) */
+  char *zLanguageid = 0;          /* languageid=? parameter (or NULL) */
 
   assert( strlen(argv[0])==4 );
   assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
        || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
   );
@@ -1104,11 +1122,12 @@
         { "matchinfo",   9 },     /* 0 -> MATCHINFO */
         { "prefix",      6 },     /* 1 -> PREFIX */
         { "compress",    8 },     /* 2 -> COMPRESS */
         { "uncompress", 10 },     /* 3 -> UNCOMPRESS */
         { "order",       5 },     /* 4 -> ORDER */
-        { "content",     7 }      /* 5 -> CONTENT */
+        { "content",     7 },     /* 5 -> CONTENT */
+        { "languageid", 10 }      /* 6 -> LANGUAGEID */
       };
 
       int iOpt;
       if( !zVal ){
         rc = SQLITE_NOMEM;
@@ -1158,16 +1177,22 @@
                 rc = SQLITE_ERROR;
               }
               bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
               break;
 
-            default:              /* CONTENT */
-              assert( iOpt==5 );
-              sqlite3_free(zUncompress);
+            case 5:              /* CONTENT */
+              sqlite3_free(zContent);
               zContent = zVal;
               zVal = 0;
               break;
+
+            case 6:              /* LANGUAGEID */
+              assert( iOpt==6 );
+              sqlite3_free(zLanguageid);
+              zLanguageid = zVal;
+              zVal = 0;
+              break;
           }
         }
         sqlite3_free(zVal);
       }
     }
@@ -1193,12 +1218,24 @@
     zUncompress = 0;
     if( nCol==0 ){
       sqlite3_free((void*)aCol); 
       aCol = 0;
       rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
+
+      /* If a languageid= option was specified, remove the language id
+      ** column from the aCol[] array. */ 
+      if( rc==SQLITE_OK && zLanguageid ){
+        int j;
+        for(j=0; j<nCol; j++){
+          if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
+            memmove(&aCol[j], &aCol[j+1], (nCol-j) * sizeof(aCol[0]));
+            nCol--;
+            break;
+          }
+        }
+      }
     }
-    assert( rc!=SQLITE_OK || nCol>0 );
   }
   if( rc!=SQLITE_OK ) goto fts3_init_out;
 
   if( nCol==0 ){
     assert( nString==0 );
@@ -1241,11 +1278,13 @@
   p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
   p->bHasDocsize = (isFts4 && bNoDocsize==0);
   p->bHasStat = isFts4;
   p->bDescIdx = bDescIdx;
   p->zContentTbl = zContent;
+  p->zLanguageid = zLanguageid;
   zContent = 0;
+  zLanguageid = 0;
   TESTONLY( p->inTransaction = -1 );
   TESTONLY( p->mxSavepoint = -1 );
 
   p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
   memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
@@ -1304,10 +1343,11 @@
   sqlite3_free(zPrefix);
   sqlite3_free(aIndex);
   sqlite3_free(zCompress);
   sqlite3_free(zUncompress);
   sqlite3_free(zContent);
+  sqlite3_free(zLanguageid);
   sqlite3_free((void *)aCol);
   if( rc!=SQLITE_OK ){
     if( p ){
       fts3DisconnectMethod((sqlite3_vtab *)p);
     }else if( pTokenizer ){
@@ -1355,10 +1395,11 @@
 */
 static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
   Fts3Table *p = (Fts3Table *)pVTab;
   int i;                          /* Iterator variable */
   int iCons = -1;                 /* Index of constraint to use */
+  int iLangidCons = -1;           /* Index of langid=x constraint, if present */
 
   /* By default use a full table scan. This is an expensive option,
   ** so search through the constraints to see if a more efficient 
   ** strategy is possible.
   */
@@ -1367,11 +1408,12 @@
   for(i=0; i<pInfo->nConstraint; i++){
     struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
     if( pCons->usable==0 ) continue;
 
     /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
-    if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
+    if( iCons<0 
+     && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
      && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
     ){
       pInfo->idxNum = FTS3_DOCID_SEARCH;
       pInfo->estimatedCost = 1.0;
       iCons = i;
@@ -1390,18 +1432,27 @@
      && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
     ){
       pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
       pInfo->estimatedCost = 2.0;
       iCons = i;
-      break;
+    }
+
+    /* Equality constraint on the langid column */
+    if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
+     && pCons->iColumn==p->nColumn + 2
+    ){
+      iLangidCons = i;
     }
   }
 
   if( iCons>=0 ){
     pInfo->aConstraintUsage[iCons].argvIndex = 1;
     pInfo->aConstraintUsage[iCons].omit = 1;
   } 
+  if( iLangidCons>=0 ){
+    pInfo->aConstraintUsage[iLangidCons].argvIndex = 2;
+  } 
 
   /* Regardless of the strategy selected, FTS can deliver rows in rowid (or
   ** docid) order. Both ascending and descending are possible. 
   */
   if( pInfo->nOrderBy==1 ){
@@ -2547,10 +2598,11 @@
 ** This function returns SQLITE_OK if successful, or an SQLite error code
 ** otherwise.
 */
 static int fts3SegReaderCursor(
   Fts3Table *p,                   /* FTS3 table handle */
+  int iLangid,                    /* Language id */
   int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
   int iLevel,                     /* Level of segments to scan */
   const char *zTerm,              /* Term to query for */
   int nTerm,                      /* Size of zTerm in bytes */
   int isPrefix,                   /* True for a prefix search */
@@ -2575,11 +2627,11 @@
     }
   }
 
   if( iLevel!=FTS3_SEGCURSOR_PENDING ){
     if( rc==SQLITE_OK ){
-      rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt);
+      rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
     }
 
     while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
       Fts3SegReader *pSeg = 0;
 
@@ -2620,10 +2672,11 @@
 ** Set up a cursor object for iterating through a full-text index or a 
 ** single level therein.
 */
 int sqlite3Fts3SegReaderCursor(
   Fts3Table *p,                   /* FTS3 table handle */
+  int iLangid,
   int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
   int iLevel,                     /* Level of segments to scan */
   const char *zTerm,              /* Term to query for */
   int nTerm,                      /* Size of zTerm in bytes */
   int isPrefix,                   /* True for a prefix search */
@@ -2644,11 +2697,11 @@
   assert( isScan==0 || p->aIndex==0 );
 
   memset(pCsr, 0, sizeof(Fts3MultiSegReader));
 
   return fts3SegReaderCursor(
-      p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
+      p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
   );
 }
 
 /*
 ** In addition to its current configuration, have the Fts3MultiSegReader
@@ -2656,15 +2709,18 @@
 **
 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
 */
 static int fts3SegReaderCursorAddZero(
   Fts3Table *p,                   /* FTS virtual table handle */
+  int iLangid,
   const char *zTerm,              /* Term to scan doclist of */
   int nTerm,                      /* Number of bytes in zTerm */
   Fts3MultiSegReader *pCsr        /* Fts3MultiSegReader to modify */
 ){
-  return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
+  return fts3SegReaderCursor(p, 
+      iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
+  );
 }
 
 /*
 ** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
 ** if isPrefix is true, to scan the doclist for all terms for which 
@@ -2696,32 +2752,35 @@
 
     if( isPrefix ){
       for(i=1; bFound==0 && i<p->nIndex; i++){
         if( p->aIndex[i].nPrefix==nTerm ){
           bFound = 1;
-          rc = sqlite3Fts3SegReaderCursor(
-              p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr);
+          rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
+              i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
+          );
           pSegcsr->bLookup = 1;
         }
       }
 
       for(i=1; bFound==0 && i<p->nIndex; i++){
         if( p->aIndex[i].nPrefix==nTerm+1 ){
           bFound = 1;
-          rc = sqlite3Fts3SegReaderCursor(
-              p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
+          rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
+              i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
           );
           if( rc==SQLITE_OK ){
-            rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr);
+            rc = fts3SegReaderCursorAddZero(
+                p, pCsr->iLangid, zTerm, nTerm, pSegcsr
+            );
           }
         }
       }
     }
 
     if( bFound==0 ){
-      rc = sqlite3Fts3SegReaderCursor(
-          p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
+      rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
+          0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
       );
       pSegcsr->bLookup = !isPrefix;
     }
   }
 
@@ -2872,11 +2931,11 @@
 
   UNUSED_PARAMETER(idxStr);
   UNUSED_PARAMETER(nVal);
 
   assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
-  assert( nVal==0 || nVal==1 );
+  assert( nVal==0 || nVal==1 || nVal==2 );
   assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
   assert( p->pSegments==0 );
 
   /* In case the cursor has been used before, clear it now. */
   sqlite3_finalize(pCsr->pStmt);
@@ -2897,12 +2956,15 @@
 
     if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
       return SQLITE_NOMEM;
     }
 
-    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat, 
-        p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
+    pCsr->iLangid = 0;
+    if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
+
+    rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
+        p->azColumn, p->bHasStat, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
     );
     if( rc!=SQLITE_OK ){
       if( rc==SQLITE_ERROR ){
         static const char *zErr = "malformed MATCH expression: [%s]";
         p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
@@ -2969,37 +3031,56 @@
 }
 
 /* 
 ** This is the xColumn method, called by SQLite to request a value from
 ** the row that the supplied cursor currently points to.
+**
+** If:
+**
+**   (iCol <  p->nColumn)   -> The value of the iCol'th user column.
+**   (iCol == p->nColumn)   -> Magic column with the same name as the table.
+**   (iCol == p->nColumn+1) -> Docid column
+**   (iCol == p->nColumn+2) -> Langid column
 */
 static int fts3ColumnMethod(
   sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
-  sqlite3_context *pContext,      /* Context for sqlite3_result_xxx() calls */
+  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
   int iCol                        /* Index of column to read value from */
 ){
   int rc = SQLITE_OK;             /* Return Code */
   Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
   Fts3Table *p = (Fts3Table *)pCursor->pVtab;
 
   /* The column value supplied by SQLite must be in range. */
-  assert( iCol>=0 && iCol<=p->nColumn+1 );
+  assert( iCol>=0 && iCol<=p->nColumn+2 );
 
   if( iCol==p->nColumn+1 ){
     /* This call is a request for the "docid" column. Since "docid" is an 
     ** alias for "rowid", use the xRowid() method to obtain the value.
     */
-    sqlite3_result_int64(pContext, pCsr->iPrevId);
+    sqlite3_result_int64(pCtx, pCsr->iPrevId);
   }else if( iCol==p->nColumn ){
     /* The extra column whose name is the same as the table.
-    ** Return a blob which is a pointer to the cursor.
-    */
-    sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
+    ** Return a blob which is a pointer to the cursor.  */
+    sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
+  }else if( iCol==p->nColumn+2 && pCsr->pExpr ){
+    sqlite3_result_int64(pCtx, pCsr->iLangid);
   }else{
+    /* The requested column is either a user column (one that contains 
+    ** indexed data), or the language-id column.  */
     rc = fts3CursorSeek(0, pCsr);
-    if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
-      sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
+
+    if( rc==SQLITE_OK ){
+      if( iCol==p->nColumn+2 ){
+        int iLangid = 0;
+        if( p->zLanguageid ){
+          iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1);
+        }
+        sqlite3_result_int(pCtx, iLangid);
+      }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
+        sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
+      }
     }
   }
 
   assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
   return rc;

Index: ext/fts3/fts3Int.h
==================================================================
--- ext/fts3/fts3Int.h
+++ ext/fts3/fts3Int.h
@@ -190,15 +190,16 @@
   const char *zName;              /* virtual table name */
   int nColumn;                    /* number of named columns in virtual table */
   char **azColumn;                /* column names.  malloced */
   sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
   char *zContentTbl;              /* content=xxx option, or NULL */
+  char *zLanguageid;              /* languageid=xxx option, or NULL */
 
   /* Precompiled statements used by the implementation. Each of these 
   ** statements is run and reset within a single virtual table API call. 
   */
-  sqlite3_stmt *aStmt[27];
+  sqlite3_stmt *aStmt[28];
 
   char *zReadExprlist;
   char *zWriteExprlist;
 
   int nNodeSize;                  /* Soft limit for node size */
@@ -209,16 +210,16 @@
   char *zSegmentsTbl;             /* Name of %_segments table */
   sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */
 
   /* TODO: Fix the first paragraph of this comment.
   **
-  ** The following hash table is used to buffer pending index updates during
-  ** transactions. Variable nPendingData estimates the memory size of the 
-  ** pending data, including hash table overhead, but not malloc overhead. 
-  ** When nPendingData exceeds nMaxPendingData, the buffer is flushed 
-  ** automatically. Variable iPrevDocid is the docid of the most recently
-  ** inserted record.
+  ** The following array of hash tables is used to buffer pending index 
+  ** updates during transactions. Variable nPendingData estimates the memory 
+  ** size of the pending data, including hash table overhead, not including
+  ** malloc overhead.  When nPendingData exceeds nMaxPendingData, the buffer 
+  ** is flushed automatically. Variable iPrevDocid is the docid of the most 
+  ** recently inserted record.
   **
   ** A single FTS4 table may have multiple full-text indexes. For each index
   ** there is an entry in the aIndex[] array. Index 0 is an index of all the
   ** terms that appear in the document set. Each subsequent index in aIndex[]
   ** is an index of prefixes of a specific length.
@@ -229,16 +230,17 @@
     Fts3Hash hPending;            /* Pending terms table for this index */
   } *aIndex;
   int nMaxPendingData;            /* Max pending data before flush to disk */
   int nPendingData;               /* Current bytes of pending data */
   sqlite_int64 iPrevDocid;        /* Docid of most recently inserted document */
+  int iPrevLangid;                /* Langid of recently inserted document */
 
 #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
   /* State variables used for validating that the transaction control
   ** methods of the virtual table are called at appropriate times.  These
-  ** values do not contribution to the FTS computation; they are used for
-  ** verifying the SQLite core.
+  ** values do not contribute to FTS functionality; they are used for
+  ** verifying the operation of the SQLite core.
   */
   int inTransaction;     /* True after xBegin but before xCommit/xRollback */
   int mxSavepoint;       /* Largest valid xSavepoint integer */
 #endif
 };
@@ -253,10 +255,11 @@
   i16 eSearch;                    /* Search strategy (see below) */
   u8 isEof;                       /* True if at End Of Results */
   u8 isRequireSeek;               /* True if must seek pStmt to %_content row */
   sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
   Fts3Expr *pExpr;                /* Parsed MATCH query string */
+  int iLangid;                    /* Language being queried for */
   int nPhrase;                    /* Number of matchable phrases in query */
   Fts3DeferredToken *pDeferred;   /* Deferred search tokens, if any */
   sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
   char *pNextId;                  /* Pointer into the body of aDoclist */
   char *aDoclist;                 /* List of docids for full-text queries */
@@ -404,11 +407,11 @@
 int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
   sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
 int sqlite3Fts3SegReaderPending(
   Fts3Table*,int,const char*,int,int,Fts3SegReader**);
 void sqlite3Fts3SegReaderFree(Fts3SegReader *);
-int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **);
+int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
 int sqlite3Fts3ReadLock(Fts3Table *);
 int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
 
 int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
 int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
@@ -425,12 +428,12 @@
 
 int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
 int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
 void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
 
-int sqlite3Fts3SegReaderCursor(
-    Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *);
+int sqlite3Fts3SegReaderCursor(Fts3Table *, 
+    int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
 
 /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
 #define FTS3_SEGMENT_REQUIRE_POS   0x00000001
 #define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
@@ -493,18 +496,22 @@
   const char *, const char *, int, int
 );
 void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
 
 /* fts3_expr.c */
-int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 
+int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
   char **, int, int, int, const char *, int, Fts3Expr **
 );
 void sqlite3Fts3ExprFree(Fts3Expr *);
 #ifdef SQLITE_TEST
 int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
 int sqlite3Fts3InitTerm(sqlite3 *db);
 #endif
+
+int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
+  sqlite3_tokenizer_cursor **
+);
 
 /* fts3_aux.c */
 int sqlite3Fts3InitAux(sqlite3 *db);
 
 void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);

Index: ext/fts3/fts3_aux.c
==================================================================
--- ext/fts3/fts3_aux.c
+++ ext/fts3/fts3_aux.c
@@ -374,11 +374,11 @@
     pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx]));
     pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]);
     if( pCsr->zStop==0 ) return SQLITE_NOMEM;
   }
 
-  rc = sqlite3Fts3SegReaderCursor(pFts3, 0, FTS3_SEGCURSOR_ALL,
+  rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL,
       pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
   );
   if( rc==SQLITE_OK ){
     rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
   }

Index: ext/fts3/fts3_expr.c
==================================================================
--- ext/fts3/fts3_expr.c
+++ ext/fts3/fts3_expr.c
@@ -90,10 +90,11 @@
 **   zero.
 */
 typedef struct ParseContext ParseContext;
 struct ParseContext {
   sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
+  int iLangid;                        /* Language id used with tokenizer */
   const char **azCol;                 /* Array of column names for fts3 table */
   int bFts4;                          /* True to allow FTS4-only syntax */
   int nCol;                           /* Number of entries in azCol[] */
   int iDefaultCol;                    /* Default column to query */
   int isNot;                          /* True if getNextNode() sees a unary - */
@@ -125,10 +126,37 @@
   void *pRet = sqlite3_malloc(nByte);
   if( pRet ) memset(pRet, 0, nByte);
   return pRet;
 }
 
+int sqlite3Fts3OpenTokenizer(
+  sqlite3_tokenizer *pTokenizer,
+  int iLangid,
+  const char *z,
+  int n,
+  sqlite3_tokenizer_cursor **ppCsr
+){
+  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+  sqlite3_tokenizer_cursor *pCsr = 0;
+  int rc;
+
+  rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
+  assert( rc==SQLITE_OK || pCsr==0 );
+  if( rc==SQLITE_OK ){
+    pCsr->pTokenizer = pTokenizer;
+    if( pModule->iVersion>=1 ){
+      rc = pModule->xLanguageid(pCsr, iLangid);
+      if( rc!=SQLITE_OK ){
+        pModule->xClose(pCsr);
+        pCsr = 0;
+      }
+    }
+  }
+  *ppCsr = pCsr;
+  return rc;
+}
+
 
 /*
 ** Extract the next token from buffer z (length n) using the tokenizer
 ** and other information (column names etc.) in pParse. Create an Fts3Expr
 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
@@ -152,19 +180,17 @@
   int rc;
   sqlite3_tokenizer_cursor *pCursor;
   Fts3Expr *pRet = 0;
   int nConsumed = 0;
 
-  rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
+  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
   if( rc==SQLITE_OK ){
     const char *zToken;
     int nToken, iStart, iEnd, iPosition;
     int nByte;                               /* total space to allocate */
 
-    pCursor->pTokenizer = pTokenizer;
     rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
-
     if( rc==SQLITE_OK ){
       nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
       pRet = (Fts3Expr *)fts3MallocZero(nByte);
       if( !pRet ){
         rc = SQLITE_NOMEM;
@@ -266,14 +292,14 @@
   **
   ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
   ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
   ** structures.
   */
-  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
+  rc = sqlite3Fts3OpenTokenizer(
+      pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
   if( rc==SQLITE_OK ){
     int ii;
-    pCursor->pTokenizer = pTokenizer;
     for(ii=0; rc==SQLITE_OK; ii++){
       const char *zByte;
       int nByte, iBegin, iEnd, iPos;
       rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
       if( rc==SQLITE_OK ){
@@ -743,10 +769,11 @@
 ** specified as part of the query string), or -1 if tokens may by default
 ** match any table column.
 */
 int sqlite3Fts3ExprParse(
   sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
+  int iLangid,                        /* Language id for tokenizer */
   char **azCol,                       /* Array of column names for fts3 table */
   int bFts4,                          /* True to allow FTS4-only syntax */
   int nCol,                           /* Number of entries in azCol[] */
   int iDefaultCol,                    /* Default column to query */
   const char *z, int n,               /* Text of MATCH query */
@@ -753,15 +780,17 @@
   Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
 ){
   int nParsed;
   int rc;
   ParseContext sParse;
+
+  memset(&sParse, 0, sizeof(ParseContext));
   sParse.pTokenizer = pTokenizer;
+  sParse.iLangid = iLangid;
   sParse.azCol = (const char **)azCol;
   sParse.nCol = nCol;
   sParse.iDefaultCol = iDefaultCol;
-  sParse.nNest = 0;
   sParse.bFts4 = bFts4;
   if( z==0 ){
     *ppExpr = 0;
     return SQLITE_OK;
   }
@@ -948,11 +977,11 @@
   for(ii=0; ii<nCol; ii++){
     azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
   }
 
   rc = sqlite3Fts3ExprParse(
-      pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
+      pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
   );
   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
     sqlite3_result_error(context, "Error parsing expression", -1);
   }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
     sqlite3_result_error_nomem(context);

Index: ext/fts3/fts3_snippet.c
==================================================================
--- ext/fts3/fts3_snippet.c
+++ ext/fts3/fts3_snippet.c
@@ -530,10 +530,11 @@
 ** is no way for fts3BestSnippet() to know whether or not the document 
 ** actually contains terms that follow the final highlighted term. 
 */
 static int fts3SnippetShift(
   Fts3Table *pTab,                /* FTS3 table snippet comes from */
+  int iLangid,                    /* Language id to use in tokenizing */
   int nSnippet,                   /* Number of tokens desired for snippet */
   const char *zDoc,               /* Document text to extract snippet from */
   int nDoc,                       /* Size of buffer zDoc in bytes */
   int *piPos,                     /* IN/OUT: First token of snippet */
   u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
@@ -565,15 +566,14 @@
       pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
 
       /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
       ** or more tokens in zDoc/nDoc.
       */
-      rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+      rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
       if( rc!=SQLITE_OK ){
         return rc;
       }
-      pC->pTokenizer = pTab->pTokenizer;
       while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
         const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
         rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
       }
       pMod->xClose(pC);
@@ -629,15 +629,14 @@
   }
   nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
 
   /* Open a token cursor on the document. */
   pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
-  rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+  rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
   if( rc!=SQLITE_OK ){
     return rc;
   }
-  pC->pTokenizer = pTab->pTokenizer;
 
   while( rc==SQLITE_OK ){
     int iBegin;                   /* Offset in zDoc of start of token */
     int iFin;                     /* Offset in zDoc of end of token */
     int isHighlight;              /* True for highlighted terms */
@@ -655,11 +654,13 @@
     }
     if( iCurrent<iPos ){ continue; }
 
     if( !isShiftDone ){
       int n = nDoc - iBegin;
-      rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
+      rc = fts3SnippetShift(
+          pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
+      );
       isShiftDone = 1;
 
       /* Now that the shift has been done, check if the initial "..." are
       ** required. They are required if (a) this is not the first fragment,
       ** or (b) this fragment does not begin at position 0 of its column. 
@@ -1388,13 +1389,14 @@
       rc = SQLITE_NOMEM;
       goto offsets_out;
     }
 
     /* Initialize a tokenizer iterator to iterate through column iCol. */
-    rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+    rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
+        zDoc, nDoc, &pC
+    );
     if( rc!=SQLITE_OK ) goto offsets_out;
-    pC->pTokenizer = pTab->pTokenizer;
 
     rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
     while( rc==SQLITE_OK ){
       int i;                      /* Used to loop through terms */
       int iMinPos = 0x7FFFFFFF;   /* Position of next token */

Index: ext/fts3/fts3_term.c
==================================================================
--- ext/fts3/fts3_term.c
+++ ext/fts3/fts3_term.c
@@ -269,11 +269,11 @@
   memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
 
   pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
   pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
 
-  rc = sqlite3Fts3SegReaderCursor(pFts3, p->iIndex, FTS3_SEGCURSOR_ALL,
+  rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
       pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
   );
   if( rc==SQLITE_OK ){
     rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
   }

Index: ext/fts3/fts3_test.c
==================================================================
--- ext/fts3/fts3_test.c
+++ ext/fts3/fts3_test.c
@@ -11,10 +11,13 @@
 ******************************************************************************
 **
 ** This file is not part of the production FTS code. It is only used for
 ** testing. It contains a Tcl command that can be used to test if a document
 ** matches an FTS NEAR expression.
+**
+** As of March 2012, it also contains a version 1 tokenizer used for testing
+** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
 */
 
 #include <tcl.h>
 #include <string.h>
 #include <assert.h>
@@ -312,13 +315,209 @@
   Tcl_DecrRefCount(pRet);
 #endif
   return TCL_OK;
 }
 
+/**************************************************************************
+** Beginning of test tokenizer code.
+**
+** For language 0, this tokenizer is similar to the default 'simple' 
+** tokenizer. For other languages L, the following:
+**
+**   * Odd numbered languages are case-sensitive. Even numbered 
+**     languages are not.
+**
+**   * Language ids 100 or greater are considered an error.
+**
+** The implementation assumes that the input contains only ASCII characters
+** (i.e. those that may be encoded in UTF-8 using a single byte).
+*/
+typedef struct test_tokenizer {
+  sqlite3_tokenizer base;
+} test_tokenizer;
+
+typedef struct test_tokenizer_cursor {
+  sqlite3_tokenizer_cursor base;
+  const char *aInput;          /* Input being tokenized */
+  int nInput;                  /* Size of the input in bytes */
+  int iInput;                  /* Current offset in aInput */
+  int iToken;                  /* Index of next token to be returned */
+  char *aBuffer;               /* Buffer containing current token */
+  int nBuffer;                 /* Number of bytes allocated at pToken */
+  int iLangid;                 /* Configured language id */
+} test_tokenizer_cursor;
+
+static int testTokenizerCreate(
+  int argc, const char * const *argv,
+  sqlite3_tokenizer **ppTokenizer
+){
+  test_tokenizer *pNew;
+
+  pNew = sqlite3_malloc(sizeof(test_tokenizer));
+  if( !pNew ) return SQLITE_NOMEM;
+  memset(pNew, 0, sizeof(test_tokenizer));
+
+  *ppTokenizer = (sqlite3_tokenizer *)pNew;
+  return SQLITE_OK;
+}
+
+static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
+  test_tokenizer *p = (test_tokenizer *)pTokenizer;
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+static int testTokenizerOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *pInput, int nBytes,        /* String to be tokenized */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  int rc = SQLITE_OK;                    /* Return code */
+  test_tokenizer_cursor *pCsr;           /* New cursor object */
+
+  UNUSED_PARAMETER(pTokenizer);
+
+  pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
+  if( pCsr==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    memset(pCsr, 0, sizeof(test_tokenizer_cursor));
+    pCsr->aInput = pInput;
+    if( nBytes<0 ){
+      pCsr->nInput = strlen(pInput);
+    }else{
+      pCsr->nInput = nBytes;
+    }
+  }
+
+  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
+  return rc;
+}
+
+static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  sqlite3_free(pCsr->aBuffer);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+static int testIsTokenChar(char c){
+  return (c>='a' && c<='z') || (c>='A' && c<='Z');
+}
+static int testTolower(char c){
+  char ret = c;
+  if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
+  return ret;
+}
+
+static int testTokenizerNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by testTokenizerOpen */
+  const char **ppToken,               /* OUT: *ppToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  int rc = SQLITE_OK;
+  const char *p;
+  const char *pEnd;
+
+  p = &pCsr->aInput[pCsr->iInput];
+  pEnd = &pCsr->aInput[pCsr->nInput];
+
+  /* Skip past any white-space */
+  assert( p<=pEnd );
+  while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
+
+  if( p==pEnd ){
+    rc = SQLITE_DONE;
+  }else{
+    /* Advance to the end of the token */
+    const char *pToken = p;
+    int nToken;
+    while( p<pEnd && testIsTokenChar(*p) ) p++;
+    nToken = p-pToken;
+
+    /* Copy the token into the buffer */
+    if( nToken>pCsr->nBuffer ){
+      sqlite3_free(pCsr->aBuffer);
+      pCsr->aBuffer = sqlite3_malloc(nToken);
+    }
+    if( pCsr->aBuffer==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      int i;
+
+      if( pCsr->iLangid & 0x00000001 ){
+        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
+      }else{
+        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
+      }
+      pCsr->iToken++;
+      pCsr->iInput = p - pCsr->aInput;
+
+      *ppToken = pCsr->aBuffer;
+      *pnBytes = nToken;
+      *piStartOffset = pToken - pCsr->aInput;
+      *piEndOffset = p - pCsr->aInput;
+      *piPosition = pCsr->iToken;
+    }
+  }
+
+  return rc;
+}
+
+static int testTokenizerLanguage(
+  sqlite3_tokenizer_cursor *pCursor,
+  int iLangid
+){
+  int rc = SQLITE_OK;
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  pCsr->iLangid = iLangid;
+  if( pCsr->iLangid>=100 ){
+    rc = SQLITE_ERROR;
+  }
+  return rc;
+}
+
+static int fts3_test_tokenizer_cmd(
+  ClientData clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  static const sqlite3_tokenizer_module testTokenizerModule = {
+    1,
+    testTokenizerCreate,
+    testTokenizerDestroy,
+    testTokenizerOpen,
+    testTokenizerClose,
+    testTokenizerNext,
+    testTokenizerLanguage
+  };
+  const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
+  if( objc!=1 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "");
+    return TCL_ERROR;
+  }
+  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
+    (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
+  ));
+  return TCL_OK;
+}
+
+/* 
+** End of tokenizer code.
+**************************************************************************/ 
+
 int Sqlitetestfts3_Init(Tcl_Interp *interp){
   Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
   Tcl_CreateObjCommand(interp, 
       "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
   );
+  Tcl_CreateObjCommand(
+      interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
+  );
   return TCL_OK;
 }
 #endif                  /* ifdef SQLITE_TEST */

Index: ext/fts3/fts3_tokenizer.c
==================================================================
--- ext/fts3/fts3_tokenizer.c
+++ ext/fts3/fts3_tokenizer.c
@@ -286,15 +286,14 @@
   if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
     zErr = "error in xCreate()";
     goto finish;
   }
   pTokenizer->pModule = p;
-  if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
+  if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
     zErr = "error in xOpen()";
     goto finish;
   }
-  pCsr->pTokenizer = pTokenizer;
 
   while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
     Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
     Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
     zToken = &zInput[iStart];

Index: ext/fts3/fts3_tokenizer.h
==================================================================
--- ext/fts3/fts3_tokenizer.h
+++ ext/fts3/fts3_tokenizer.h
@@ -50,11 +50,11 @@
 typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
 
 struct sqlite3_tokenizer_module {
 
   /*
-  ** Structure version. Should always be set to 0.
+  ** Structure version. Should always be set to 0 or 1.
   */
   int iVersion;
 
   /*
   ** Create a new tokenizer. The values in the argv[] array are the
@@ -131,10 +131,19 @@
     const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
     int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
     int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
     int *piPosition      /* OUT: Number of tokens returned before this one */
   );
+
+  /***********************************************************************
+  ** Methods below this point are only available if iVersion>=1.
+  */
+
+  /* 
+  ** Configure the language id of a tokenizer cursor.
+  */
+  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
 };
 
 struct sqlite3_tokenizer {
   const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
   /* Tokenizer implementations will typically add additional fields */

Index: ext/fts3/fts3_write.c
==================================================================
--- ext/fts3/fts3_write.c
+++ ext/fts3/fts3_write.c
@@ -230,10 +230,12 @@
 #define SQL_SELECT_ALL_PREFIX_LEVEL   24
 #define SQL_DELETE_ALL_TERMS_SEGDIR   25
 
 #define SQL_DELETE_SEGDIR_RANGE       26
 
+#define SQL_SELECT_ALL_LANGID         27
+
 /*
 ** This function is used to obtain an SQLite prepared statement handle
 ** for the statement identified by the second argument. If successful,
 ** *pp is set to the requested statement handle and SQLITE_OK returned.
 ** Otherwise, an SQLite error code is returned and *pp is set to 0.
@@ -283,10 +285,11 @@
 /* 23 */  "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
 /* 24 */  "",
 /* 25 */  "",
 
 /* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
+/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",
 
   };
   int rc = SQLITE_OK;
   sqlite3_stmt *pStmt;
 
@@ -428,10 +431,23 @@
     rc = SQLITE_OK;
   }
 
   return rc;
 }
+
+static sqlite3_int64 getAbsoluteLevel(
+  Fts3Table *p, 
+  int iLangid, 
+  int iIndex, 
+  int iLevel
+){
+  assert( iLangid>=0 );
+  assert( p->nIndex>0 );
+  assert( iIndex>=0 && iIndex<p->nIndex );
+  return (iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL + iLevel;
+}
+
 
 /*
 ** Set *ppStmt to a statement handle that may be used to iterate through
 ** all rows in the %_segdir table, from oldest to newest. If successful,
 ** return SQLITE_OK. If an error occurs while preparing the statement, 
@@ -448,10 +464,11 @@
 **   3: end_block
 **   4: root
 */
 int sqlite3Fts3AllSegdirs(
   Fts3Table *p,                   /* FTS3 table */
+  int iLangid,                    /* Language being queried */
   int iIndex,                     /* Index for p->aIndex[] */
   int iLevel,                     /* Level to select */
   sqlite3_stmt **ppStmt           /* OUT: Compiled statement */
 ){
   int rc;
@@ -463,18 +480,20 @@
 
   if( iLevel<0 ){
     /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
     rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
     if( rc==SQLITE_OK ){ 
-      sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
-      sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL-1);
+      sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+      sqlite3_bind_int(pStmt, 2, 
+          getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+      );
     }
   }else{
     /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
     rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
     if( rc==SQLITE_OK ){ 
-      sqlite3_bind_int(pStmt, 1, iLevel+iIndex*FTS3_SEGDIR_MAXLEVEL);
+      sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
     }
   }
   *ppStmt = pStmt;
   return rc;
 }
@@ -636,10 +655,11 @@
 **
 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
 */
 static int fts3PendingTermsAdd(
   Fts3Table *p,                   /* Table into which text will be inserted */
+  int iLangid,                    /* Language id to use */
   const char *zText,              /* Text of document to be inserted */
   int iCol,                       /* Column into which text is being inserted */
   u32 *pnWord                     /* OUT: Number of tokens inserted */
 ){
   int rc;
@@ -665,15 +685,14 @@
   if( zText==0 ){
     *pnWord = 0;
     return SQLITE_OK;
   }
 
-  rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr);
+  rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
   if( rc!=SQLITE_OK ){
     return rc;
   }
-  pCsr->pTokenizer = pTokenizer;
 
   xNext = pModule->xNext;
   while( SQLITE_OK==rc
       && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
   ){
@@ -712,22 +731,32 @@
 /* 
 ** Calling this function indicates that subsequent calls to 
 ** fts3PendingTermsAdd() are to add term/position-list pairs for the
 ** contents of the document with docid iDocid.
 */
-static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){
+static int fts3PendingTermsDocid(
+  Fts3Table *p,                   /* Full-text table handle */
+  int iLangid,                    /* Language id of row being written */
+  sqlite_int64 iDocid             /* Docid of row being written */
+){
+  assert( iLangid>=0 );
+
   /* TODO(shess) Explore whether partially flushing the buffer on
   ** forced-flush would provide better performance.  I suspect that if
   ** we ordered the doclists by size and flushed the largest until the
   ** buffer was half empty, that would let the less frequent terms
   ** generate longer doclists.
   */
-  if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){
+  if( iDocid<=p->iPrevDocid 
+   || p->iPrevLangid!=iLangid
+   || p->nPendingData>p->nMaxPendingData 
+  ){
     int rc = sqlite3Fts3PendingTermsFlush(p);
     if( rc!=SQLITE_OK ) return rc;
   }
   p->iPrevDocid = iDocid;
+  p->iPrevLangid = iLangid;
   return SQLITE_OK;
 }
 
 /*
 ** Discard the contents of the pending-terms hash tables. 
@@ -752,15 +781,20 @@
 ** pendingTerms hash table.
 **
 ** Argument apVal is the same as the similarly named argument passed to
 ** fts3InsertData(). Parameter iDocid is the docid of the new row.
 */
-static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){
+static int fts3InsertTerms(
+  Fts3Table *p, 
+  int iLangid, 
+  sqlite3_value **apVal, 
+  u32 *aSz
+){
   int i;                          /* Iterator variable */
   for(i=2; i<p->nColumn+2; i++){
     const char *zText = (const char *)sqlite3_value_text(apVal[i]);
-    int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
+    int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]);
     if( rc!=SQLITE_OK ){
       return rc;
     }
     aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
   }
@@ -777,10 +811,11 @@
 **   apVal[2]                Left-most user-defined column
 **   ...
 **   apVal[p->nColumn+1]     Right-most user-defined column
 **   apVal[p->nColumn+2]     Hidden column with same name as table
 **   apVal[p->nColumn+3]     Hidden "docid" column (alias for rowid)
+**   apVal[p->nColumn+4]     Hidden languageid column
 */
 static int fts3InsertData(
   Fts3Table *p,                   /* Full-text table */
   sqlite3_value **apVal,          /* Array of values to insert */
   sqlite3_int64 *piDocid          /* OUT: Docid for row just inserted */
@@ -807,13 +842,17 @@
   **
   ** The statement features N '?' variables, where N is the number of user
   ** defined columns in the FTS3 table, plus one for the docid field.
   */
   rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
-  if( rc!=SQLITE_OK ){
-    return rc;
+  if( rc==SQLITE_OK && p->zLanguageid ){
+    rc = sqlite3_bind_int(
+        pContentInsert, p->nColumn+2, 
+        sqlite3_value_int(apVal[p->nColumn+4])
+    );
   }
+  if( rc!=SQLITE_OK ) return rc;
 
   /* There is a quirk here. The users INSERT statement may have specified
   ** a value for the "rowid" field, for the "docid" field, or for both.
   ** Which is a problem, since "rowid" and "docid" are aliases for the
   ** same value. For example:
@@ -868,10 +907,19 @@
   if( p->bHasStat ){
     fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
   }
   return rc;
 }
+
+/*
+**
+*/
+static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
+  int iLangid = 0;
+  if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
+  return iLangid;
+}
 
 /*
 ** The first element in the apVal[] array is assumed to contain the docid
 ** (an integer) of a row about to be deleted. Remove all terms from the
 ** full-text index.
@@ -888,19 +936,21 @@
   if( *pRC ) return;
   rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
   if( rc==SQLITE_OK ){
     if( SQLITE_ROW==sqlite3_step(pSelect) ){
       int i;
-      for(i=1; i<=p->nColumn; i++){
+      int iLangid = langidFromSelect(p, pSelect);
+      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
+      for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
         const char *zText = (const char *)sqlite3_column_text(pSelect, i);
-        rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
-        if( rc!=SQLITE_OK ){
-          sqlite3_reset(pSelect);
-          *pRC = rc;
-          return;
-        }
+        rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]);
         aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
+      }
+      if( rc!=SQLITE_OK ){
+        sqlite3_reset(pSelect);
+        *pRC = rc;
+        return;
       }
     }
     rc = sqlite3_reset(pSelect);
   }else{
     sqlite3_reset(pSelect);
@@ -910,11 +960,11 @@
 
 /*
 ** Forward declaration to account for the circular dependency between
 ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
 */
-static int fts3SegmentMerge(Fts3Table *, int, int);
+static int fts3SegmentMerge(Fts3Table *, int, int, int);
 
 /* 
 ** This function allocates a new level iLevel index in the segdir table.
 ** Usually, indexes are allocated within a level sequentially starting
 ** with 0, so the allocated index is one greater than the value returned
@@ -929,22 +979,28 @@
 ** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
 ** returned. Otherwise, an SQLite error code is returned.
 */
 static int fts3AllocateSegdirIdx(
   Fts3Table *p, 
+  int iLangid,                    /* Language id */
   int iIndex,                     /* Index for p->aIndex */
   int iLevel, 
   int *piIdx
 ){
   int rc;                         /* Return Code */
   sqlite3_stmt *pNextIdx;         /* Query for next idx at level iLevel */
   int iNext = 0;                  /* Result of query pNextIdx */
 
+  assert( iLangid>=0 );
+  assert( p->nIndex>=1 );
+
   /* Set variable iNext to the next available segdir index at level iLevel. */
   rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
   if( rc==SQLITE_OK ){
-    sqlite3_bind_int(pNextIdx, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
+    sqlite3_bind_int64(
+        pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
+    );
     if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
       iNext = sqlite3_column_int(pNextIdx, 0);
     }
     rc = sqlite3_reset(pNextIdx);
   }
@@ -954,11 +1010,11 @@
     ** full, merge all segments in level iLevel into a single iLevel+1
     ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
     ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
     */
     if( iNext>=FTS3_MERGE_COUNT ){
-      rc = fts3SegmentMerge(p, iIndex, iLevel);
+      rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
       *piIdx = 0;
     }else{
       *piIdx = iNext;
     }
   }
@@ -2177,11 +2233,16 @@
 **
 ** Segment levels are stored in the 'level' column of the %_segdir table.
 **
 ** Return SQLITE_OK if successful, or an SQLite error code if not.
 */
-static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){
+static int fts3SegmentMaxLevel(
+  Fts3Table *p, 
+  int iLangid,
+  int iIndex, 
+  int *pnMax
+){
   sqlite3_stmt *pStmt;
   int rc;
   assert( iIndex>=0 && iIndex<p->nIndex );
 
   /* Set pStmt to the compiled version of:
@@ -2190,12 +2251,14 @@
   **
   ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
   */
   rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
   if( rc!=SQLITE_OK ) return rc;
-  sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
-  sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL - 1);
+  sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+  sqlite3_bind_int(pStmt, 2, 
+      getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+  );
   if( SQLITE_ROW==sqlite3_step(pStmt) ){
     *pnMax = sqlite3_column_int(pStmt, 0);
   }
   return sqlite3_reset(pStmt);
 }
@@ -2214,10 +2277,11 @@
 **
 ** SQLITE_OK is returned if successful, otherwise an SQLite error code.
 */
 static int fts3DeleteSegdir(
   Fts3Table *p,                   /* Virtual table handle */
+  int iLangid,                    /* Language id */
   int iIndex,                     /* Index for p->aIndex */
   int iLevel,                     /* Level of %_segdir entries to delete */
   Fts3SegReader **apSegment,      /* Array of SegReader objects */
   int nReader                     /* Size of array apSegment */
 ){
@@ -2241,17 +2305,19 @@
 
   assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL );
   if( iLevel==FTS3_SEGCURSOR_ALL ){
     rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
     if( rc==SQLITE_OK ){
-      sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
-      sqlite3_bind_int(pDelete, 2, (iIndex+1) * FTS3_SEGDIR_MAXLEVEL - 1);
+      sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+      sqlite3_bind_int(pDelete, 2, 
+          getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+      );
     }
   }else{
     rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
     if( rc==SQLITE_OK ){
-      sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
+      sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
     }
   }
 
   if( rc==SQLITE_OK ){
     sqlite3_step(pDelete);
@@ -2716,27 +2782,32 @@
 ** If this function is called with iLevel<0, but there is only one
 ** segment in the database, SQLITE_DONE is returned immediately. 
 ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, 
 ** an SQLite error code is returned.
 */
-static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
+static int fts3SegmentMerge(
+  Fts3Table *p, 
+  int iLangid,                    /* Language id to merge */
+  int iIndex,                     /* Index in p->aIndex[] to merge */
+  int iLevel                      /* Level to merge */
+){
   int rc;                         /* Return code */
   int iIdx = 0;                   /* Index of new segment */
   int iNewLevel = 0;              /* Level/index to create new segment at */
   SegmentWriter *pWriter = 0;     /* Used to write the new, merged, segment */
   Fts3SegFilter filter;           /* Segment term filter condition */
-  Fts3MultiSegReader csr;        /* Cursor to iterate through level(s) */
+  Fts3MultiSegReader csr;         /* Cursor to iterate through level(s) */
   int bIgnoreEmpty = 0;           /* True to ignore empty segments */
 
   assert( iLevel==FTS3_SEGCURSOR_ALL
        || iLevel==FTS3_SEGCURSOR_PENDING
        || iLevel>=0
   );
   assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
   assert( iIndex>=0 && iIndex<p->nIndex );
 
-  rc = sqlite3Fts3SegReaderCursor(p, iIndex, iLevel, 0, 0, 1, 0, &csr);
+  rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
   if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
 
   if( iLevel==FTS3_SEGCURSOR_ALL ){
     /* This call is to merge all segments in the database to a single
     ** segment. The level of the new segment is equal to the the numerically 
@@ -2744,28 +2815,28 @@
     ** index. The idx of the new segment is always 0.  */
     if( csr.nSegment==1 ){
       rc = SQLITE_DONE;
       goto finished;
     }
-    rc = fts3SegmentMaxLevel(p, iIndex, &iNewLevel);
+    rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
     bIgnoreEmpty = 1;
 
   }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
-    iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL; 
-    rc = fts3AllocateSegdirIdx(p, iIndex, 0, &iIdx);
+    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
+    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
   }else{
     /* This call is to merge all segments at level iLevel. find the next
     ** available segment index at level iLevel+1. The call to
     ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to 
     ** a single iLevel+2 segment if necessary.  */
-    rc = fts3AllocateSegdirIdx(p, iIndex, iLevel+1, &iIdx);
-    iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL + iLevel+1;
+    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
+    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
   }
   if( rc!=SQLITE_OK ) goto finished;
   assert( csr.nSegment>0 );
-  assert( iNewLevel>=(iIndex*FTS3_SEGDIR_MAXLEVEL) );
-  assert( iNewLevel<((iIndex+1)*FTS3_SEGDIR_MAXLEVEL) );
+  assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
+  assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
 
   memset(&filter, 0, sizeof(Fts3SegFilter));
   filter.flags = FTS3_SEGMENT_REQUIRE_POS;
   filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
 
@@ -2778,11 +2849,13 @@
   }
   if( rc!=SQLITE_OK ) goto finished;
   assert( pWriter );
 
   if( iLevel!=FTS3_SEGCURSOR_PENDING ){
-    rc = fts3DeleteSegdir(p, iIndex, iLevel, csr.apSegment, csr.nSegment);
+    rc = fts3DeleteSegdir(
+        p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
+    );
     if( rc!=SQLITE_OK ) goto finished;
   }
   rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
 
  finished:
@@ -2797,11 +2870,11 @@
 */
 int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
   int rc = SQLITE_OK;
   int i;
   for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
-    rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_PENDING);
+    rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
     if( rc==SQLITE_DONE ) rc = SQLITE_OK;
   }
   sqlite3Fts3PendingTermsClear(p);
   return rc;
 }
@@ -2952,21 +3025,38 @@
   sqlite3_step(pStmt);
   *pRC = sqlite3_reset(pStmt);
   sqlite3_free(a);
 }
 
+/*
+** Merge the entire database so that there is one segment for each 
+** iIndex/iLangid combination.
+*/
 static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
-  int i;
   int bSeenDone = 0;
-  int rc = SQLITE_OK;
-  for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
-    rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_ALL);
-    if( rc==SQLITE_DONE ){
-      bSeenDone = 1;
-      rc = SQLITE_OK;
-    }
-  }
+  int rc;
+  sqlite3_stmt *pAllLangid = 0;
+
+  rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
+  if( rc==SQLITE_OK ){
+    int rc2;
+    sqlite3_bind_int(pAllLangid, 1, p->nIndex);
+    while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
+      int i;
+      int iLangid = sqlite3_column_int(pAllLangid, 0);
+      for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
+        rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
+        if( rc==SQLITE_DONE ){
+          bSeenDone = 1;
+          rc = SQLITE_OK;
+        }
+      }
+    }
+    rc2 = sqlite3_reset(pAllLangid);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+
   sqlite3Fts3SegmentsClose(p);
   sqlite3Fts3PendingTermsClear(p);
 
   return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
 }
@@ -3013,15 +3103,16 @@
       }
     }
 
     while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
       int iCol;
-      rc = fts3PendingTermsDocid(p, sqlite3_column_int64(pStmt, 0));
+      int iLangid = langidFromSelect(p, pStmt);
+      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
       aSz[p->nColumn] = 0;
       for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
         const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
-        rc = fts3PendingTermsAdd(p, z, iCol, &aSz[iCol]);
+        rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
         aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
       }
       if( p->bHasDocsize ){
         fts3InsertDocsize(&rc, p, aSz);
       }
@@ -3136,18 +3227,17 @@
   
     for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
       const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
       sqlite3_tokenizer_cursor *pTC = 0;
   
-      rc = pModule->xOpen(pT, zText, -1, &pTC);
+      rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
       while( rc==SQLITE_OK ){
         char const *zToken;       /* Buffer containing token */
         int nToken;               /* Number of bytes in token */
         int iDum1, iDum2;         /* Dummy variables */
         int iPos;                 /* Position of token in zText */
   
-        pTC->pTokenizer = pT;
         rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
         for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
           Fts3PhraseToken *pPT = pDef->pToken;
           if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
            && (pPT->bFirst==0 || iPos==0)
@@ -3243,12 +3333,10 @@
       ** delete the contents of all three tables and throw away any
       ** data in the pendingTerms hash table.  */
       rc = fts3DeleteAll(p, 1);
       *pnDoc = *pnDoc - 1;
     }else{
-      sqlite3_int64 iRemove = sqlite3_value_int64(pRowid);
-      rc = fts3PendingTermsDocid(p, iRemove);
       fts3DeleteTerms(&rc, p, pRowid, aSzDel);
       if( p->zContentTbl==0 ){
         fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
         if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1;
       }else{
@@ -3263,11 +3351,20 @@
   return rc;
 }
 
 /*
 ** This function does the work for the xUpdate method of FTS3 virtual
-** tables.
+** tables. The schema of the virtual table being:
+**
+**     CREATE TABLE <table name>( 
+**       <user COLUMns>,
+**       <table name> HIDDEN, 
+**       docid HIDDEN, 
+**       <langid> HIDDEN
+**     );
+**
+** 
 */
 int sqlite3Fts3UpdateMethod(
   sqlite3_vtab *pVtab,            /* FTS3 vtab object */
   int nArg,                       /* Size of argument array */
   sqlite3_value **apVal,          /* Array of arguments */
@@ -3280,10 +3377,14 @@
   u32 *aSzDel;                    /* Sizes of deleted documents */
   int nChng = 0;                  /* Net change in number of documents */
   int bInsertDone = 0;
 
   assert( p->pSegments==0 );
+  assert( 
+      nArg==1                     /* DELETE operations */
+   || nArg==(2 + p->nColumn + 3)  /* INSERT or UPDATE operations */
+  );
 
   /* Check for a "special" INSERT operation. One of the form:
   **
   **   INSERT INTO xyz(xyz) VALUES('command');
   */
@@ -3292,10 +3393,15 @@
    && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL 
   ){
     rc = fts3SpecialInsert(p, apVal[p->nColumn+2]);
     goto update_out;
   }
+
+  if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
+    rc = SQLITE_CONSTRAINT;
+    goto update_out;
+  }
 
   /* Allocate space to hold the change in document sizes */
   aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
   if( aSzIns==0 ){
     rc = SQLITE_NOMEM;
@@ -3360,22 +3466,23 @@
     isRemove = 1;
   }
   
   /* If this is an INSERT or UPDATE operation, insert the new record. */
   if( nArg>1 && rc==SQLITE_OK ){
+    int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
     if( bInsertDone==0 ){
       rc = fts3InsertData(p, apVal, pRowid);
       if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
         rc = FTS_CORRUPT_VTAB;
       }
     }
     if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
-      rc = fts3PendingTermsDocid(p, *pRowid);
+      rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
     }
     if( rc==SQLITE_OK ){
       assert( p->iPrevDocid==*pRowid );
-      rc = fts3InsertTerms(p, apVal, aSzIns);
+      rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
     }
     if( p->bHasDocsize ){
       fts3InsertDocsize(&rc, p, aSzIns);
     }
     nChng++;

ADDED   test/fts4langid.test
Index: test/fts4langid.test
==================================================================
--- /dev/null
+++ test/fts4langid.test
@@ -0,0 +1,385 @@
+# 2012 March 01
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library.  The
+# focus of this script is testing the languageid=xxx FTS4 option.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+set ::testprefix fts4content
+
+# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+ifcapable !fts3 {
+  finish_test
+  return
+}
+
+set ::testprefix fts4langid
+
+#---------------------------------------------------------------------------
+# Test plan:
+#
+#   1.* - Warm-body tests created for specific purposes during development.
+#         Passing these doesn't really prove much.
+#
+#   2.1.* - Test that FTS queries only ever return rows associated with
+#           the requested language.
+#
+#   2.2.* - Same as 2.1.*, after an 'optimize' command.
+#
+#   2.3.* - Same as 2.1.*, after a 'rebuild' command.
+#
+#   3.* - Tests with content= tables. Both where there is a real 
+#         underlying content table and where there is not.
+#
+#   4.* - Test that if one is provided, the tokenizer xLanguage method
+#         is called to configure the tokenizer before tokenizing query
+#         or document text.
+#
+#   5.* - Test the fts4aux table when the associated FTS4 table contains
+#         multiple languages.
+#
+
+do_execsql_test 1.1 {
+  CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id);
+}
+
+do_execsql_test 1.2 {
+  SELECT sql FROM sqlite_master WHERE name = 't1_content';
+} {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}}
+
+do_execsql_test 1.3 {SELECT docid FROM t1} {}
+do_execsql_test 1.4 {SELECT lang_id FROM t1} {}
+
+do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')}
+do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0}
+
+do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)}
+do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4}
+
+do_execsql_test 1.9  {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')}
+do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0}
+
+do_execsql_test 1.11 {
+  CREATE VIRTUAL TABLE t2 USING fts4;
+  INSERT INTO t2 VALUES('abc');
+} 
+do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1
+
+do_execsql_test 1.13 {
+  DROP TABLE t1;
+  CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
+  INSERT INTO t1(content)          VALUES('a b c');
+  INSERT INTO t1(content, lang_id) VALUES('a b c', 1);
+}
+
+do_execsql_test 1.14 {
+  SELECT rowid FROM t1 WHERE t1 MATCH 'b';
+} {1}
+do_execsql_test 1.15 {
+  SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0;
+} {1}
+
+do_execsql_test 1.16 {
+  SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1;
+} {2}
+
+do_catchsql_test 1.17 {
+  INSERT INTO t1(content, lang_id) VALUES('123', -1);
+} {1 {constraint failed}}
+
+do_execsql_test 1.18 {
+  DROP TABLE t1;
+  CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
+  INSERT INTO t1(content, lang_id) VALUES('A', 13);
+  INSERT INTO t1(content, lang_id) VALUES('B', 13);
+  INSERT INTO t1(content, lang_id) VALUES('C', 13);
+  INSERT INTO t1(content, lang_id) VALUES('D', 13);
+  INSERT INTO t1(content, lang_id) VALUES('E', 13);
+  INSERT INTO t1(content, lang_id) VALUES('F', 13);
+  INSERT INTO t1(content, lang_id) VALUES('G', 13);
+  INSERT INTO t1(content, lang_id) VALUES('H', 13);
+  INSERT INTO t1(content, lang_id) VALUES('I', 13);
+  INSERT INTO t1(content, lang_id) VALUES('J', 13);
+  INSERT INTO t1(content, lang_id) VALUES('K', 13);
+  INSERT INTO t1(content, lang_id) VALUES('L', 13);
+  INSERT INTO t1(content, lang_id) VALUES('M', 13);
+  INSERT INTO t1(content, lang_id) VALUES('N', 13);
+  INSERT INTO t1(content, lang_id) VALUES('O', 13);
+  INSERT INTO t1(content, lang_id) VALUES('P', 13);
+  INSERT INTO t1(content, lang_id) VALUES('Q', 13);
+  INSERT INTO t1(content, lang_id) VALUES('R', 13);
+  INSERT INTO t1(content, lang_id) VALUES('S', 13);
+  SELECT rowid FROM t1 WHERE t1 MATCH 'A';
+} {}
+
+
+#-------------------------------------------------------------------------
+# Test cases 2.*
+#
+proc build_multilingual_db_1 {db} {
+  $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) }
+
+  set xwords [list zero one two three four five six seven eight nine ten]
+  set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa]
+
+  for {set i 0} {$i < 1000} {incr i} {
+    set iLangid [expr $i%9]
+    set x ""
+    set y ""
+
+    set x [list]
+    lappend x [lindex $xwords [expr ($i / 1000) % 10]]
+    lappend x [lindex $xwords [expr ($i / 100)  % 10]]
+    lappend x [lindex $xwords [expr ($i / 10)   % 10]]
+    lappend x [lindex $xwords [expr ($i / 1)   % 10]]
+
+    set y [list]
+    lappend y [lindex $ywords [expr ($i / 1000) % 10]]
+    lappend y [lindex $ywords [expr ($i / 100)  % 10]]
+    lappend y [lindex $ywords [expr ($i / 10)   % 10]]
+    lappend y [lindex $ywords [expr ($i / 1)   % 10]]
+
+    $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) }
+  }
+
+  $db eval {
+    CREATE TABLE data(x, y, l);
+    INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2;
+  }
+}
+
+proc rowid_list_set_langid {langid} {
+  set ::rowid_list_langid $langid
+}
+proc rowid_list {pattern} {
+  set langid $::rowid_list_langid
+  set res [list]
+  db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} {
+    if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} {
+      lappend res $rowid
+    }
+  }
+  return $res
+}
+
+proc or_merge_list {list1 list2} {
+  set res [list]
+
+  set i1 0
+  set i2 0
+
+  set n1 [llength $list1]
+  set n2 [llength $list2]
+
+  while {$i1 < $n1 && $i2 < $n2} {
+    set e1 [lindex $list1 $i1]
+    set e2 [lindex $list2 $i2]
+
+    if {$e1==$e2} {
+      lappend res $e1
+      incr i1
+      incr i2
+    } elseif {$e1 < $e2} {
+      lappend res $e1
+      incr i1
+    } else {
+      lappend res $e2
+      incr i2
+    }
+  }
+
+  concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end]
+}
+
+proc or_merge_lists {args} {
+  set res [lindex $args 0]
+  for {set i 1} {$i < [llength $args]} {incr i} {
+    set res [or_merge_list $res [lindex $args $i]]
+  }
+  set res
+}
+
+proc and_merge_list {list1 list2} {
+  foreach i $list2 { set a($i) 1 }
+  set res [list]
+  foreach i $list1 {
+    if {[info exists a($i)]} {lappend res $i}
+  }
+  set res
+}
+
+
+proc and_merge_lists {args} {
+  set res [lindex $args 0]
+  for {set i 1} {$i < [llength $args]} {incr i} {
+    set res [and_merge_list $res [lindex $args $i]]
+  }
+  set res
+}
+
+proc filter_list {list langid} {
+  set res [list]
+  foreach i $list {
+    if {($i % 9) == $langid} {lappend res $i}
+  }
+  set res
+}
+
+do_test 2.0 { 
+  reset_db
+  build_multilingual_db_1 db
+} {}
+
+proc do_test_query1 {tn query res_script} {
+  for {set langid 0} {$langid < 10} {incr langid} {
+    rowid_list_set_langid $langid
+    set res [eval $res_script]
+
+    set actual [
+      execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
+    ]
+    do_test $tn.$langid [list set {} $actual] $res
+  }
+}
+
+# Run some queries. 
+do_test_query1 2.1.1  {delta}          { rowid_list delta }
+do_test_query1 2.1.2  {"zero one two"} { rowid_list "zero one two" }
+do_test_query1 2.1.3  {zero one two} {
+  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
+}
+do_test_query1 2.1.4  {"zero one" OR "one two"} {
+  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
+}
+
+# Now try the same tests as above, but after running the 'optimize'
+# command on the FTS table.
+#
+do_execsql_test 2.2 {
+  INSERT INTO t2(t2) VALUES('optimize');
+  SELECT count(*) FROM t2_segdir;
+} {9}
+do_test_query1 2.2.1 {delta}          { rowid_list delta }
+do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" }
+do_test_query1 2.2.3 {zero one two} {
+  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
+}
+do_test_query1 2.2.4 {"zero one" OR "one two"} {
+  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
+}
+
+# And rebuild.
+#
+do_test 2.3 { 
+  reset_db
+  build_multilingual_db_1 db
+  execsql { INSERT INTO t2(t2) VALUES('rebuild') }
+} {}
+do_test_query1 2.3.1 {delta}          { rowid_list delta }
+do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" }
+do_test_query1 2.3.3 {zero one two} {
+  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
+}
+do_test_query1 2.3.4 {"zero one" OR "one two"} {
+  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
+}
+
+#-------------------------------------------------------------------------
+# Test cases 3.*
+#
+do_test 3.0 {
+  reset_db
+  build_multilingual_db_1 db
+  execsql {
+    CREATE TABLE t3_data(l, x, y);
+    INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2;
+    DROP TABLE t2;
+  }
+} {}
+do_execsql_test 3.1 {
+  CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l);
+  INSERT INTO t2(t2) VALUES('rebuild');
+}
+
+do_test_query1 3.1.1 {delta}          { rowid_list delta }
+do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" }
+do_test_query1 3.1.3 {zero one two} {
+  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
+}
+do_test_query1 3.1.4 {"zero one" OR "one two"} {
+  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
+}
+
+do_execsql_test 3.2.1 {
+  DROP TABLE t2;
+  CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable);
+}
+
+do_execsql_test 3.2.2 {
+  INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data;
+}
+
+do_execsql_test 3.2.3 {
+  DROP TABLE t3_data;
+}
+
+do_test_query1 3.3.1 {delta}          { rowid_list delta }
+do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" }
+do_test_query1 3.3.3 {zero one two} {
+  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
+}
+do_test_query1 3.3.4 {"zero one" OR "one two"} {
+  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
+}
+
+#-------------------------------------------------------------------------
+# Test cases 4.*
+#
+proc build_multilingual_db_2 {db} {
+  $db eval {
+    CREATE VIRTUAL TABLE t4 USING fts4(
+        tokenize=testtokenizer, 
+        languageid=lid
+    );
+  }
+  for {set i 0} {$i < 50} {incr i} {
+    execsql { 
+      INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) 
+    }
+  }
+}
+
+do_test 4.1.0 {
+  reset_db
+  set ptr [fts3_test_tokenizer]
+  execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) }
+  build_multilingual_db_2 db
+} {}
+do_execsql_test 4.1.1 {
+  SELECT docid FROM t4 WHERE t4 MATCH 'quick';
+} {0}
+do_execsql_test 4.1.2 {
+  SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1;
+} {}
+do_execsql_test 4.1.3 {
+  SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1;
+} {1}
+for {set i 0} {$i < 50} {incr i} {
+  do_execsql_test 4.1.4.$i {
+    SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i;
+  } [expr 0==($i%2)]
+}
+do_catchsql_test 4.1.5 {
+  INSERT INTO t4(content, lid) VALUES('hello world', 101)
+} {1 {SQL logic error or missing database}}
+
+finish_test

Index: test/permutations.test
==================================================================
--- test/permutations.test
+++ test/permutations.test
@@ -182,12 +182,11 @@
   fts3sort.test
   fts3fault.test fts3malloc.test fts3matchinfo.test
   fts3aux1.test fts3comp1.test fts3auto.test
   fts4aa.test fts4content.test
   fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
-  fts3corrupt2.test
-  fts3first.test
+  fts3corrupt2.test fts3first.test fts4langid.test
 }
 
 
 lappend ::testsuitelist xxx
 #-------------------------------------------------------------------------