/ Check-in [4a585189]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the sqlite3.pDfltColl (the default collating sequence for the database connection) so that it is the collating sequence appropriate for the database encoding, not the UTF8 collating sequence. This helps to ensure that the database encoding collation is always used, even for expressions that do not have an defined collating sequence. Ticket [1b8d7264567eb6fc].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 4a5851893c3d71cc823b6ab5df5e58a852cd322fff26290f1ea05b63d67f564a
User & Date: drh 2020-03-05 16:13:24
Context
2020-03-05
18:04
Report an error if the main, or any other, database encoding is modified by an external process (perhaps using the backup API) after the db has been opened. (check-in: 895bd20b user: dan tags: trunk)
16:30
Merge trunk enhancements into the reuse-schema branch. (Actual check-in date is 2020-05-18) (check-in: 9d068e1e user: drh tags: reuse-schema)
16:13
Change the sqlite3.pDfltColl (the default collating sequence for the database connection) so that it is the collating sequence appropriate for the database encoding, not the UTF8 collating sequence. This helps to ensure that the database encoding collation is always used, even for expressions that do not have an defined collating sequence. Ticket [1b8d7264567eb6fc]. (check-in: 4a585189 user: drh tags: trunk)
14:19
When printing the OP_CollSeq opcode for EXPLAIN listings, include the text encoding with the name of the collating sequence. (check-in: eb5c1b77 user: drh tags: trunk)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/callback.c.

159
160
161
162
163
164
165


166
167

168
169
170
171
172
173
174
175













176
177
178
179
180
181
182
CollSeq *sqlite3FindCollSeq(
  sqlite3 *db,          /* Database connection to search */
  u8 enc,               /* Desired text encoding */
  const char *zName,    /* Name of the collating sequence.  Might be NULL */
  int create            /* True to create CollSeq if doesn't already exist */
){
  CollSeq *pColl;


  if( zName ){
    pColl = findCollSeqEntry(db, zName, create);

  }else{
    pColl = db->pDfltColl;
  }
  assert( SQLITE_UTF8==1 && SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 );
  assert( enc>=SQLITE_UTF8 && enc<=SQLITE_UTF16BE );
  if( pColl ) pColl += enc-1;
  return pColl;
}














/*
** This function is responsible for invoking the collation factory callback
** or substituting a collation sequence of a different encoding when the
** requested collation sequence is not available in the desired encoding.
** 
** If it is not NULL, then pColl must point to the database native encoding 







>
>


>



<
<
<


>
>
>
>
>
>
>
>
>
>
>
>
>







159
160
161
162
163
164
165
166
167
168
169
170
171
172
173



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
CollSeq *sqlite3FindCollSeq(
  sqlite3 *db,          /* Database connection to search */
  u8 enc,               /* Desired text encoding */
  const char *zName,    /* Name of the collating sequence.  Might be NULL */
  int create            /* True to create CollSeq if doesn't already exist */
){
  CollSeq *pColl;
  assert( SQLITE_UTF8==1 && SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 );
  assert( enc>=SQLITE_UTF8 && enc<=SQLITE_UTF16BE );
  if( zName ){
    pColl = findCollSeqEntry(db, zName, create);
    if( pColl ) pColl += enc-1;
  }else{
    pColl = db->pDfltColl;
  }



  return pColl;
}

/*
** Change the text encoding for a database connection. This means that
** the pDfltColl must change as well.
*/
void sqlite3SetTextEncoding(sqlite3 *db, u8 enc){
  assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE );
  db->enc = enc;
  /* EVIDENCE-OF: R-08308-17224 The default collating function for all
  ** strings is BINARY. 
  */
  db->pDfltColl = sqlite3FindCollSeq(db, enc, sqlite3StrBINARY, 0);
}

/*
** This function is responsible for invoking the collation factory callback
** or substituting a collation sequence of a different encoding when the
** requested collation sequence is not available in the desired encoding.
** 
** If it is not NULL, then pColl must point to the database native encoding 

Changes to src/main.c.

3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
....
3244
3245
3246
3247
3248
3249
3250
3251


3252
3253
3254
3255
3256
3257
3258
  createCollation(db, sqlite3StrBINARY, SQLITE_UTF16BE, 0, binCollFunc, 0);
  createCollation(db, sqlite3StrBINARY, SQLITE_UTF16LE, 0, binCollFunc, 0);
  createCollation(db, "NOCASE", SQLITE_UTF8, 0, nocaseCollatingFunc, 0);
  createCollation(db, "RTRIM", SQLITE_UTF8, 0, rtrimCollFunc, 0);
  if( db->mallocFailed ){
    goto opendb_out;
  }
  /* EVIDENCE-OF: R-08308-17224 The default collating function for all
  ** strings is BINARY. 
  */
  db->pDfltColl = sqlite3FindCollSeq(db, SQLITE_UTF8, sqlite3StrBINARY, 0);
  assert( db->pDfltColl!=0 );

  /* Parse the filename/URI argument
  **
  ** Only allow sensible combinations of bits in the flags argument.  
  ** Throw an error if any non-sense combination is used.  If we
  ** do not block illegal combinations here, it could trigger
  ** assert() statements in deeper layers.  Sensible combinations
................................................................................
      rc = SQLITE_NOMEM_BKPT;
    }
    sqlite3Error(db, rc);
    goto opendb_out;
  }
  sqlite3BtreeEnter(db->aDb[0].pBt);
  db->aDb[0].pSchema = sqlite3SchemaGet(db, db->aDb[0].pBt);
  if( !db->mallocFailed ) ENC(db) = SCHEMA_ENC(db);


  sqlite3BtreeLeave(db->aDb[0].pBt);
  db->aDb[1].pSchema = sqlite3SchemaGet(db, 0);

  /* The default safety_level for the main database is FULL; for the temp
  ** database it is OFF. This matches the pager layer defaults.  
  */
  db->aDb[0].zDbSName = "main";







<
<
<
<
<







 







|
>
>







3195
3196
3197
3198
3199
3200
3201





3202
3203
3204
3205
3206
3207
3208
....
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
  createCollation(db, sqlite3StrBINARY, SQLITE_UTF16BE, 0, binCollFunc, 0);
  createCollation(db, sqlite3StrBINARY, SQLITE_UTF16LE, 0, binCollFunc, 0);
  createCollation(db, "NOCASE", SQLITE_UTF8, 0, nocaseCollatingFunc, 0);
  createCollation(db, "RTRIM", SQLITE_UTF8, 0, rtrimCollFunc, 0);
  if( db->mallocFailed ){
    goto opendb_out;
  }






  /* Parse the filename/URI argument
  **
  ** Only allow sensible combinations of bits in the flags argument.  
  ** Throw an error if any non-sense combination is used.  If we
  ** do not block illegal combinations here, it could trigger
  ** assert() statements in deeper layers.  Sensible combinations
................................................................................
      rc = SQLITE_NOMEM_BKPT;
    }
    sqlite3Error(db, rc);
    goto opendb_out;
  }
  sqlite3BtreeEnter(db->aDb[0].pBt);
  db->aDb[0].pSchema = sqlite3SchemaGet(db, db->aDb[0].pBt);
  if( !db->mallocFailed ){
    sqlite3SetTextEncoding(db, SCHEMA_ENC(db));
  }
  sqlite3BtreeLeave(db->aDb[0].pBt);
  db->aDb[1].pSchema = sqlite3SchemaGet(db, 0);

  /* The default safety_level for the main database is FULL; for the temp
  ** database it is OFF. This matches the pager layer defaults.  
  */
  db->aDb[0].zDbSName = "main";

Changes to src/pragma.c.

1833
1834
1835
1836
1837
1838
1839
1840
1841


1842
1843
1844
1845
1846
1847
1848
        ){
          canChangeEnc = 0;
        }
      }
      if( canChangeEnc ){
        for(pEnc=&encnames[0]; pEnc->zName; pEnc++){
          if( 0==sqlite3StrICmp(zRight, pEnc->zName) ){
            SCHEMA_ENC(db) = ENC(db) =
                pEnc->enc ? pEnc->enc : SQLITE_UTF16NATIVE;


            break;
          }
        }
        if( !pEnc->zName ){
          sqlite3ErrorMsg(pParse, "unsupported encoding: %s", zRight);
        }
      }







<
|
>
>







1833
1834
1835
1836
1837
1838
1839

1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
        ){
          canChangeEnc = 0;
        }
      }
      if( canChangeEnc ){
        for(pEnc=&encnames[0]; pEnc->zName; pEnc++){
          if( 0==sqlite3StrICmp(zRight, pEnc->zName) ){

            u8 enc = pEnc->enc ? pEnc->enc : SQLITE_UTF16NATIVE;
            SCHEMA_ENC(db) = enc;
            sqlite3SetTextEncoding(db, enc);
            break;
          }
        }
        if( !pEnc->zName ){
          sqlite3ErrorMsg(pParse, "unsupported encoding: %s", zRight);
        }
      }

Changes to src/prepare.c.

263
264
265
266
267
268
269
270
271

272
273
274
275
276
277
278

279
280
281
282
283
284
285
  /* If opening a non-empty database, check the text encoding. For the
  ** main database, set sqlite3.enc to the encoding of the main database.
  ** For an attached db, it is an error if the encoding is not the same
  ** as sqlite3.enc.
  */
  if( meta[BTREE_TEXT_ENCODING-1] ){  /* text encoding */
    if( iDb==0 ){
#ifndef SQLITE_OMIT_UTF16
      u8 encoding;

      /* If opening the main database, set ENC(db). */
      encoding = (u8)meta[BTREE_TEXT_ENCODING-1] & 3;
      if( encoding==0 ) encoding = SQLITE_UTF8;
      ENC(db) = encoding;
#else
      ENC(db) = SQLITE_UTF8;
#endif

    }else{
      /* If opening an attached database, the encoding much match ENC(db) */
      if( meta[BTREE_TEXT_ENCODING-1]!=ENC(db) ){
        sqlite3SetString(pzErrMsg, db, "attached databases must use the same"
            " text encoding as main database");
        rc = SQLITE_ERROR;
        goto initone_error_out;







<

>



<

|

>







263
264
265
266
267
268
269

270
271
272
273
274

275
276
277
278
279
280
281
282
283
284
285
  /* If opening a non-empty database, check the text encoding. For the
  ** main database, set sqlite3.enc to the encoding of the main database.
  ** For an attached db, it is an error if the encoding is not the same
  ** as sqlite3.enc.
  */
  if( meta[BTREE_TEXT_ENCODING-1] ){  /* text encoding */
    if( iDb==0 ){

      u8 encoding;
#ifndef SQLITE_OMIT_UTF16
      /* If opening the main database, set ENC(db). */
      encoding = (u8)meta[BTREE_TEXT_ENCODING-1] & 3;
      if( encoding==0 ) encoding = SQLITE_UTF8;

#else
      encoding = SQLITE_UTF8;
#endif
      sqlite3SetTextEncoding(db, encoding);
    }else{
      /* If opening an attached database, the encoding much match ENC(db) */
      if( meta[BTREE_TEXT_ENCODING-1]!=ENC(db) ){
        sqlite3SetString(pzErrMsg, db, "attached databases must use the same"
            " text encoding as main database");
        rc = SQLITE_ERROR;
        goto initone_error_out;

Changes to src/sqliteInt.h.

1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
....
4461
4462
4463
4464
4465
4466
4467

4468
4469
4470
4471
4472
4473
4474

/*
** Each database connection is an instance of the following structure.
*/
struct sqlite3 {
  sqlite3_vfs *pVfs;            /* OS Interface */
  struct Vdbe *pVdbe;           /* List of active virtual machines */
  CollSeq *pDfltColl;           /* The default collating sequence (BINARY) */
  sqlite3_mutex *mutex;         /* Connection mutex */
  Db *aDb;                      /* All backends */
  int nDb;                      /* Number of backends currently in use */
  u32 mDbFlags;                 /* flags recording internal state */
  u64 flags;                    /* flags settable by pragmas. See below */
  i64 lastRowid;                /* ROWID of most recent insert (see above) */
  i64 szMmap;                   /* Default mmap_size setting */
................................................................................
#endif

const char *sqlite3ErrStr(int);
int sqlite3ReadSchema(Parse *pParse);
CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int);
int sqlite3IsBinary(const CollSeq*);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char*zName);

CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
CollSeq *sqlite3ExprNNCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3ExprCollSeqMatch(Parse*,Expr*,Expr*);
Expr *sqlite3ExprAddCollateToken(Parse *pParse, Expr*, const Token*, int);
Expr *sqlite3ExprAddCollateString(Parse*,Expr*,const char*);
Expr *sqlite3ExprSkipCollate(Expr*);
Expr *sqlite3ExprSkipCollateAndLikely(Expr*);







|







 







>







1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
....
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475

/*
** Each database connection is an instance of the following structure.
*/
struct sqlite3 {
  sqlite3_vfs *pVfs;            /* OS Interface */
  struct Vdbe *pVdbe;           /* List of active virtual machines */
  CollSeq *pDfltColl;           /* BINARY collseq for the database encoding */
  sqlite3_mutex *mutex;         /* Connection mutex */
  Db *aDb;                      /* All backends */
  int nDb;                      /* Number of backends currently in use */
  u32 mDbFlags;                 /* flags recording internal state */
  u64 flags;                    /* flags settable by pragmas. See below */
  i64 lastRowid;                /* ROWID of most recent insert (see above) */
  i64 szMmap;                   /* Default mmap_size setting */
................................................................................
#endif

const char *sqlite3ErrStr(int);
int sqlite3ReadSchema(Parse *pParse);
CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int);
int sqlite3IsBinary(const CollSeq*);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char*zName);
void sqlite3SetTextEncoding(sqlite3 *db, u8);
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
CollSeq *sqlite3ExprNNCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3ExprCollSeqMatch(Parse*,Expr*,Expr*);
Expr *sqlite3ExprAddCollateToken(Parse *pParse, Expr*, const Token*, int);
Expr *sqlite3ExprAddCollateString(Parse*,Expr*,const char*);
Expr *sqlite3ExprSkipCollate(Expr*);
Expr *sqlite3ExprSkipCollateAndLikely(Expr*);