/ Check-in [032b3daa]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimizations to the tokenizer. (CVS 815)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:032b3daa1d3cf3e00a4a6ba0b09624f1aba6445c
User & Date: drh 2003-01-07 01:44:38
Context
2003-01-07
02:47
More optimizations. (CVS 816) check-in: a362981b user: drh tags: trunk
01:44
Optimizations to the tokenizer. (CVS 815) check-in: 032b3daa user: drh tags: trunk
2003-01-06
23:54
Remove unnecessary code from the VDBE. (CVS 814) check-in: b96ec281 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/tokenize.c.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
...
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
...
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
...
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422

423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441





442
443
444
445
446
447
448

449
450
451
452
453
454
455
456
457
458
459

460
461
462

463
464
465
466


467
468
469
470
471
472
473
474
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.51 2002/10/27 19:35:35 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
................................................................................
** -1 if the token is (or might be) incomplete.  Store the token
** type in *tokenType before returning.
*/
static int sqliteGetToken(const unsigned char *z, int *tokenType){
  int i;
  switch( *z ){
    case ' ': case '\t': case '\n': case '\f': case '\r': {
      for(i=1; z[i] && isspace(z[i]); i++){}
      *tokenType = TK_SPACE;
      return i;
    }
    case '-': {
      if( z[1]==0 ) return -1;
      if( z[1]=='-' ){
        for(i=2; z[i] && z[i]!='\n'; i++){}
................................................................................
        return 1;
      }
      /* Fall thru into the next case */
    }
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9': {
      *tokenType = TK_INTEGER;
      for(i=1; z[i] && isdigit(z[i]); i++){}
      if( z[i]=='.' ){
        i++;
        while( z[i] && isdigit(z[i]) ){ i++; }
        *tokenType = TK_FLOAT;
      }
      if( (z[i]=='e' || z[i]=='E') &&
           ( isdigit(z[i+1]) 
            || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
           )
      ){
        i += 2;
        while( z[i] && isdigit(z[i]) ){ i++; }
        *tokenType = TK_FLOAT;
      }else if( z[0]=='.' ){
        *tokenType = TK_FLOAT;
      }
      return i;
    }
    case '[': {
................................................................................
** memory obtained from malloc() and *pzErrMsg made to point to that
** error message.  Or maybe not.
*/
int sqliteRunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
  int nErr = 0;
  int i;
  void *pEngine;
  int once = 1;
  sqlite *db = pParse->db;
  extern void *sqliteParserAlloc(void*(*)(int));
  extern void sqliteParserFree(void*, void(*)(void*));
  extern int sqliteParser(void*, int, Token, Parse*);

  db->flags &= ~SQLITE_Interrupt;
  pParse->rc = SQLITE_OK;
  i = 0;
  pEngine = sqliteParserAlloc((void*(*)(int))malloc);
  if( pEngine==0 ){
    sqliteSetString(pzErrMsg, "out of memory", 0);
    return 1;
  }

  while( sqlite_malloc_failed==0 && nErr==0 && i>=0 && zSql[i]!=0 ){
    int tokenType;
    
    if( (db->flags & SQLITE_Interrupt)!=0 ){
      pParse->rc = SQLITE_INTERRUPT;
      sqliteSetString(pzErrMsg, "interrupt", 0);
      break;
    }
    pParse->sLastToken.z = &zSql[i];
    pParse->sLastToken.dyn = 0;
    pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType);
    i += pParse->sLastToken.n;
    if( once ){
      pParse->sFirstToken = pParse->sLastToken;
      once = 0;
    }
    switch( tokenType ){
      case TK_SPACE:
      case TK_COMMENT: {





        break;
      }
      case TK_ILLEGAL:
        sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1, 
           pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0);
        nErr++;
        break;

      default:
        sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse);
        if( pParse->zErrMsg && pParse->sErrToken.z ){
          sqliteSetNString(pzErrMsg, "near \"", -1, 
             pParse->sErrToken.z, pParse->sErrToken.n,
             "\": ", -1,
             pParse->zErrMsg, -1,
             0);
          nErr++;
          sqliteFree(pParse->zErrMsg);
          pParse->zErrMsg = 0;

        }else if( pParse->rc!=SQLITE_OK ){
          sqliteSetString(pzErrMsg, sqlite_error_string(pParse->rc), 0);
          nErr++;

        }
        break;
    }
  }


  if( zSql[i]==0 ){
    sqliteParser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
    sqliteParser(pEngine, 0, pParse->sLastToken, pParse);
    if( pParse->zErrMsg && pParse->sErrToken.z ){
       sqliteSetNString(pzErrMsg, "near \"", -1, 
          pParse->sErrToken.z, pParse->sErrToken.n,
          "\": ", -1,
          pParse->zErrMsg, -1,







|







 







|







 







|


|








|







 







<













>
|


<
|
<
<
<

|


<
<
<
<



>
>
>
>
>


|



|
>
|










>



>


|
|
>
>
|







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
...
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
...
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
...
402
403
404
405
406
407
408

409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425

426



427
428
429
430




431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.52 2003/01/07 01:44:38 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
................................................................................
** -1 if the token is (or might be) incomplete.  Store the token
** type in *tokenType before returning.
*/
static int sqliteGetToken(const unsigned char *z, int *tokenType){
  int i;
  switch( *z ){
    case ' ': case '\t': case '\n': case '\f': case '\r': {
      for(i=1; isspace(z[i]); i++){}
      *tokenType = TK_SPACE;
      return i;
    }
    case '-': {
      if( z[1]==0 ) return -1;
      if( z[1]=='-' ){
        for(i=2; z[i] && z[i]!='\n'; i++){}
................................................................................
        return 1;
      }
      /* Fall thru into the next case */
    }
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9': {
      *tokenType = TK_INTEGER;
      for(i=1; isdigit(z[i]); i++){}
      if( z[i]=='.' ){
        i++;
        while( isdigit(z[i]) ){ i++; }
        *tokenType = TK_FLOAT;
      }
      if( (z[i]=='e' || z[i]=='E') &&
           ( isdigit(z[i+1]) 
            || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
           )
      ){
        i += 2;
        while( isdigit(z[i]) ){ i++; }
        *tokenType = TK_FLOAT;
      }else if( z[0]=='.' ){
        *tokenType = TK_FLOAT;
      }
      return i;
    }
    case '[': {
................................................................................
** memory obtained from malloc() and *pzErrMsg made to point to that
** error message.  Or maybe not.
*/
int sqliteRunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
  int nErr = 0;
  int i;
  void *pEngine;

  sqlite *db = pParse->db;
  extern void *sqliteParserAlloc(void*(*)(int));
  extern void sqliteParserFree(void*, void(*)(void*));
  extern int sqliteParser(void*, int, Token, Parse*);

  db->flags &= ~SQLITE_Interrupt;
  pParse->rc = SQLITE_OK;
  i = 0;
  pEngine = sqliteParserAlloc((void*(*)(int))malloc);
  if( pEngine==0 ){
    sqliteSetString(pzErrMsg, "out of memory", 0);
    return 1;
  }
  pParse->sLastToken.dyn = 0;
  while( sqlite_malloc_failed==0 && zSql[i]!=0 ){
    int tokenType;
    

    assert( i>=0 );



    pParse->sLastToken.z = &zSql[i];
    assert( pParse->sLastToken.dyn==0 );
    pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType);
    i += pParse->sLastToken.n;




    switch( tokenType ){
      case TK_SPACE:
      case TK_COMMENT: {
        if( (db->flags & SQLITE_Interrupt)!=0 ){
          pParse->rc = SQLITE_INTERRUPT;
          sqliteSetString(pzErrMsg, "interrupt", 0);
          goto abort_parse;
        }
        break;
      }
      case TK_ILLEGAL: {
        sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1, 
           pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0);
        nErr++;
        goto abort_parse;cvs
      }
      default: {
        sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse);
        if( pParse->zErrMsg && pParse->sErrToken.z ){
          sqliteSetNString(pzErrMsg, "near \"", -1, 
             pParse->sErrToken.z, pParse->sErrToken.n,
             "\": ", -1,
             pParse->zErrMsg, -1,
             0);
          nErr++;
          sqliteFree(pParse->zErrMsg);
          pParse->zErrMsg = 0;
          goto abort_parse;
        }else if( pParse->rc!=SQLITE_OK ){
          sqliteSetString(pzErrMsg, sqlite_error_string(pParse->rc), 0);
          nErr++;
          goto abort_parse;
        }
        break;
      }
    }
  }
abort_parse:
  if( zSql[i]==0 && nErr==0 ){
    sqliteParser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
    sqliteParser(pEngine, 0, pParse->sLastToken, pParse);
    if( pParse->zErrMsg && pParse->sErrToken.z ){
       sqliteSetNString(pzErrMsg, "near \"", -1, 
          pParse->sErrToken.z, pParse->sErrToken.n,
          "\": ", -1,
          pParse->zErrMsg, -1,