/ Artifact Content
Login

Artifact 65297bcce8d5acd5aadef42acbe739aef5a2ef5e74c7b73361ca19f3e21de657:


     1  /*
     2  ** 2016-05-28
     3  **
     4  ** The author disclaims copyright to this source code.  In place of
     5  ** a legal notice, here is a blessing:
     6  **
     7  **    May you do good and not evil.
     8  **    May you find forgiveness for yourself and forgive others.
     9  **    May you share freely, never taking more than you give.
    10  **
    11  ******************************************************************************
    12  **
    13  ** This file contains the implementation of an SQLite virtual table for
    14  ** reading CSV files.
    15  **
    16  ** Usage:
    17  **
    18  **    .load ./csv
    19  **    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
    20  **    SELECT * FROM csv;
    21  **
    22  ** The columns are named "c1", "c2", "c3", ... by default.  But the
    23  ** application can define its own CREATE TABLE statement as an additional
    24  ** parameter.  For example:
    25  **
    26  **    CREATE VIRTUAL TABLE temp.csv2 USING csv(
    27  **       filename = "../http.log",
    28  **       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
    29  **    );
    30  **
    31  ** Instead of specifying a file, the text of the CSV can be loaded using
    32  ** the data= parameter.
    33  **
    34  ** If the columns=N parameter is supplied, then the CSV file is assumed to have
    35  ** N columns.  If the columns parameter is omitted, the CSV file is opened
    36  ** as soon as the virtual table is constructed and the first row of the CSV
    37  ** is read in order to count the tables.
    38  **
    39  ** Some extra debugging features (used for testing virtual tables) are available
    40  ** if this module is compiled with -DSQLITE_TEST.
    41  */
    42  #include <sqlite3ext.h>
    43  SQLITE_EXTENSION_INIT1
    44  #include <string.h>
    45  #include <stdlib.h>
    46  #include <assert.h>
    47  #include <stdarg.h>
    48  #include <ctype.h>
    49  #include <stdio.h>
    50  
    51  #ifndef SQLITE_OMIT_VIRTUALTABLE
    52  
    53  /*
    54  ** A macro to hint to the compiler that a function should not be
    55  ** inlined.
    56  */
    57  #if defined(__GNUC__)
    58  #  define CSV_NOINLINE  __attribute__((noinline))
    59  #elif defined(_MSC_VER) && _MSC_VER>=1310
    60  #  define CSV_NOINLINE  __declspec(noinline)
    61  #else
    62  #  define CSV_NOINLINE
    63  #endif
    64  
    65  
    66  /* Max size of the error message in a CsvReader */
    67  #define CSV_MXERR 200
    68  
    69  /* Size of the CsvReader input buffer */
    70  #define CSV_INBUFSZ 1024
    71  
    72  /* A context object used when read a CSV file. */
    73  typedef struct CsvReader CsvReader;
    74  struct CsvReader {
    75    FILE *in;              /* Read the CSV text from this input stream */
    76    char *z;               /* Accumulated text for a field */
    77    int n;                 /* Number of bytes in z */
    78    int nAlloc;            /* Space allocated for z[] */
    79    int nLine;             /* Current line number */
    80    int bNotFirst;         /* True if prior text has been seen */
    81    int cTerm;             /* Character that terminated the most recent field */
    82    size_t iIn;            /* Next unread character in the input buffer */
    83    size_t nIn;            /* Number of characters in the input buffer */
    84    char *zIn;             /* The input buffer */
    85    char zErr[CSV_MXERR];  /* Error message */
    86  };
    87  
    88  /* Initialize a CsvReader object */
    89  static void csv_reader_init(CsvReader *p){
    90    p->in = 0;
    91    p->z = 0;
    92    p->n = 0;
    93    p->nAlloc = 0;
    94    p->nLine = 0;
    95    p->bNotFirst = 0;
    96    p->nIn = 0;
    97    p->zIn = 0;
    98    p->zErr[0] = 0;
    99  }
   100  
   101  /* Close and reset a CsvReader object */
   102  static void csv_reader_reset(CsvReader *p){
   103    if( p->in ){
   104      fclose(p->in);
   105      sqlite3_free(p->zIn);
   106    }
   107    sqlite3_free(p->z);
   108    csv_reader_init(p);
   109  }
   110  
   111  /* Report an error on a CsvReader */
   112  static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
   113    va_list ap;
   114    va_start(ap, zFormat);
   115    sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
   116    va_end(ap);
   117  }
   118  
   119  /* Open the file associated with a CsvReader
   120  ** Return the number of errors.
   121  */
   122  static int csv_reader_open(
   123    CsvReader *p,               /* The reader to open */
   124    const char *zFilename,      /* Read from this filename */
   125    const char *zData           /*  ... or use this data */
   126  ){
   127    if( zFilename ){
   128      p->zIn = sqlite3_malloc( CSV_INBUFSZ );
   129      if( p->zIn==0 ){
   130        csv_errmsg(p, "out of memory");
   131        return 1;
   132      }
   133      p->in = fopen(zFilename, "rb");
   134      if( p->in==0 ){
   135        sqlite3_free(p->zIn);
   136        csv_reader_reset(p);
   137        csv_errmsg(p, "cannot open '%s' for reading", zFilename);
   138        return 1;
   139      }
   140    }else{
   141      assert( p->in==0 );
   142      p->zIn = (char*)zData;
   143      p->nIn = strlen(zData);
   144    }
   145    return 0;
   146  }
   147  
   148  /* The input buffer has overflowed.  Refill the input buffer, then
   149  ** return the next character
   150  */
   151  static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
   152    size_t got;
   153  
   154    assert( p->iIn>=p->nIn );  /* Only called on an empty input buffer */
   155    assert( p->in!=0 );        /* Only called if reading froma file */
   156  
   157    got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
   158    if( got==0 ) return EOF;
   159    p->nIn = got;
   160    p->iIn = 1;
   161    return p->zIn[0];
   162  }
   163  
   164  /* Return the next character of input.  Return EOF at end of input. */
   165  static int csv_getc(CsvReader *p){
   166    if( p->iIn >= p->nIn ){
   167      if( p->in!=0 ) return csv_getc_refill(p);
   168      return EOF;
   169    }
   170    return ((unsigned char*)p->zIn)[p->iIn++];
   171  }
   172  
   173  /* Increase the size of p->z and append character c to the end. 
   174  ** Return 0 on success and non-zero if there is an OOM error */
   175  static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
   176    char *zNew;
   177    int nNew = p->nAlloc*2 + 100;
   178    zNew = sqlite3_realloc64(p->z, nNew);
   179    if( zNew ){
   180      p->z = zNew;
   181      p->nAlloc = nNew;
   182      p->z[p->n++] = c;
   183      return 0;
   184    }else{
   185      csv_errmsg(p, "out of memory");
   186      return 1;
   187    }
   188  }
   189  
   190  /* Append a single character to the CsvReader.z[] array.
   191  ** Return 0 on success and non-zero if there is an OOM error */
   192  static int csv_append(CsvReader *p, char c){
   193    if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
   194    p->z[p->n++] = c;
   195    return 0;
   196  }
   197  
   198  /* Read a single field of CSV text.  Compatible with rfc4180 and extended
   199  ** with the option of having a separator other than ",".
   200  **
   201  **   +  Input comes from p->in.
   202  **   +  Store results in p->z of length p->n.  Space to hold p->z comes
   203  **      from sqlite3_malloc64().
   204  **   +  Keep track of the line number in p->nLine.
   205  **   +  Store the character that terminates the field in p->cTerm.  Store
   206  **      EOF on end-of-file.
   207  **
   208  ** Return 0 at EOF or on OOM.  On EOF, the p->cTerm character will have
   209  ** been set to EOF.
   210  */
   211  static char *csv_read_one_field(CsvReader *p){
   212    int c;
   213    p->n = 0;
   214    c = csv_getc(p);
   215    if( c==EOF ){
   216      p->cTerm = EOF;
   217      return 0;
   218    }
   219    if( c=='"' ){
   220      int pc, ppc;
   221      int startLine = p->nLine;
   222      pc = ppc = 0;
   223      while( 1 ){
   224        c = csv_getc(p);
   225        if( c<='"' || pc=='"' ){
   226          if( c=='\n' ) p->nLine++;
   227          if( c=='"' ){
   228            if( pc=='"' ){
   229              pc = 0;
   230              continue;
   231            }
   232          }
   233          if( (c==',' && pc=='"')
   234           || (c=='\n' && pc=='"')
   235           || (c=='\n' && pc=='\r' && ppc=='"')
   236           || (c==EOF && pc=='"')
   237          ){
   238            do{ p->n--; }while( p->z[p->n]!='"' );
   239            p->cTerm = (char)c;
   240            break;
   241          }
   242          if( pc=='"' && c!='\r' ){
   243            csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
   244            break;
   245          }
   246          if( c==EOF ){
   247            csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
   248                       startLine, '"');
   249            p->cTerm = (char)c;
   250            break;
   251          }
   252        }
   253        if( csv_append(p, (char)c) ) return 0;
   254        ppc = pc;
   255        pc = c;
   256      }
   257    }else{
   258      /* If this is the first field being parsed and it begins with the
   259      ** UTF-8 BOM  (0xEF BB BF) then skip the BOM */
   260      if( (c&0xff)==0xef && p->bNotFirst==0 ){
   261        csv_append(p, (char)c);
   262        c = csv_getc(p);
   263        if( (c&0xff)==0xbb ){
   264          csv_append(p, (char)c);
   265          c = csv_getc(p);
   266          if( (c&0xff)==0xbf ){
   267            p->bNotFirst = 1;
   268            p->n = 0;
   269            return csv_read_one_field(p);
   270          }
   271        }
   272      }
   273      while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
   274        if( csv_append(p, (char)c) ) return 0;
   275        c = csv_getc(p);
   276      }
   277      if( c=='\n' ){
   278        p->nLine++;
   279        if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
   280      }
   281      p->cTerm = (char)c;
   282    }
   283    if( p->z ) p->z[p->n] = 0;
   284    p->bNotFirst = 1;
   285    return p->z;
   286  }
   287  
   288  
   289  /* Forward references to the various virtual table methods implemented
   290  ** in this file. */
   291  static int csvtabCreate(sqlite3*, void*, int, const char*const*, 
   292                             sqlite3_vtab**,char**);
   293  static int csvtabConnect(sqlite3*, void*, int, const char*const*, 
   294                             sqlite3_vtab**,char**);
   295  static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
   296  static int csvtabDisconnect(sqlite3_vtab*);
   297  static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
   298  static int csvtabClose(sqlite3_vtab_cursor*);
   299  static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
   300                            int argc, sqlite3_value **argv);
   301  static int csvtabNext(sqlite3_vtab_cursor*);
   302  static int csvtabEof(sqlite3_vtab_cursor*);
   303  static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
   304  static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
   305  
   306  /* An instance of the CSV virtual table */
   307  typedef struct CsvTable {
   308    sqlite3_vtab base;              /* Base class.  Must be first */
   309    char *zFilename;                /* Name of the CSV file */
   310    char *zData;                    /* Raw CSV data in lieu of zFilename */
   311    long iStart;                    /* Offset to start of data in zFilename */
   312    int nCol;                       /* Number of columns in the CSV file */
   313    unsigned int tstFlags;          /* Bit values used for testing */
   314  } CsvTable;
   315  
   316  /* Allowed values for tstFlags */
   317  #define CSVTEST_FIDX  0x0001      /* Pretend that constrained searchs cost less*/
   318  
   319  /* A cursor for the CSV virtual table */
   320  typedef struct CsvCursor {
   321    sqlite3_vtab_cursor base;       /* Base class.  Must be first */
   322    CsvReader rdr;                  /* The CsvReader object */
   323    char **azVal;                   /* Value of the current row */
   324    int *aLen;                      /* Length of each entry */
   325    sqlite3_int64 iRowid;           /* The current rowid.  Negative for EOF */
   326  } CsvCursor;
   327  
   328  /* Transfer error message text from a reader into a CsvTable */
   329  static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
   330    sqlite3_free(pTab->base.zErrMsg);
   331    pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
   332  }
   333  
   334  /*
   335  ** This method is the destructor fo a CsvTable object.
   336  */
   337  static int csvtabDisconnect(sqlite3_vtab *pVtab){
   338    CsvTable *p = (CsvTable*)pVtab;
   339    sqlite3_free(p->zFilename);
   340    sqlite3_free(p->zData);
   341    sqlite3_free(p);
   342    return SQLITE_OK;
   343  }
   344  
   345  /* Skip leading whitespace.  Return a pointer to the first non-whitespace
   346  ** character, or to the zero terminator if the string has only whitespace */
   347  static const char *csv_skip_whitespace(const char *z){
   348    while( isspace((unsigned char)z[0]) ) z++;
   349    return z;
   350  }
   351  
   352  /* Remove trailing whitespace from the end of string z[] */
   353  static void csv_trim_whitespace(char *z){
   354    size_t n = strlen(z);
   355    while( n>0 && isspace((unsigned char)z[n]) ) n--;
   356    z[n] = 0;
   357  }
   358  
   359  /* Dequote the string */
   360  static void csv_dequote(char *z){
   361    int j;
   362    char cQuote = z[0];
   363    size_t i, n;
   364  
   365    if( cQuote!='\'' && cQuote!='"' ) return;
   366    n = strlen(z);
   367    if( n<2 || z[n-1]!=z[0] ) return;
   368    for(i=1, j=0; i<n-1; i++){
   369      if( z[i]==cQuote && z[i+1]==cQuote ) i++;
   370      z[j++] = z[i];
   371    }
   372    z[j] = 0;
   373  }
   374  
   375  /* Check to see if the string is of the form:  "TAG = VALUE" with optional
   376  ** whitespace before and around tokens.  If it is, return a pointer to the
   377  ** first character of VALUE.  If it is not, return NULL.
   378  */
   379  static const char *csv_parameter(const char *zTag, int nTag, const char *z){
   380    z = csv_skip_whitespace(z);
   381    if( strncmp(zTag, z, nTag)!=0 ) return 0;
   382    z = csv_skip_whitespace(z+nTag);
   383    if( z[0]!='=' ) return 0;
   384    return csv_skip_whitespace(z+1);
   385  }
   386  
   387  /* Decode a parameter that requires a dequoted string.
   388  **
   389  ** Return 1 if the parameter is seen, or 0 if not.  1 is returned
   390  ** even if there is an error.  If an error occurs, then an error message
   391  ** is left in p->zErr.  If there are no errors, p->zErr[0]==0.
   392  */
   393  static int csv_string_parameter(
   394    CsvReader *p,            /* Leave the error message here, if there is one */
   395    const char *zParam,      /* Parameter we are checking for */
   396    const char *zArg,        /* Raw text of the virtual table argment */
   397    char **pzVal             /* Write the dequoted string value here */
   398  ){
   399    const char *zValue;
   400    zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
   401    if( zValue==0 ) return 0;
   402    p->zErr[0] = 0;
   403    if( *pzVal ){
   404      csv_errmsg(p, "more than one '%s' parameter", zParam);
   405      return 1;
   406    }
   407    *pzVal = sqlite3_mprintf("%s", zValue);
   408    if( *pzVal==0 ){
   409      csv_errmsg(p, "out of memory");
   410      return 1;
   411    }
   412    csv_trim_whitespace(*pzVal);
   413    csv_dequote(*pzVal);
   414    return 1;
   415  }
   416  
   417  
   418  /* Return 0 if the argument is false and 1 if it is true.  Return -1 if
   419  ** we cannot really tell.
   420  */
   421  static int csv_boolean(const char *z){
   422    if( sqlite3_stricmp("yes",z)==0
   423     || sqlite3_stricmp("on",z)==0
   424     || sqlite3_stricmp("true",z)==0
   425     || (z[0]=='1' && z[1]==0)
   426    ){
   427      return 1;
   428    }
   429    if( sqlite3_stricmp("no",z)==0
   430     || sqlite3_stricmp("off",z)==0
   431     || sqlite3_stricmp("false",z)==0
   432     || (z[0]=='0' && z[1]==0)
   433    ){
   434      return 0;
   435    }
   436    return -1;
   437  }
   438  
   439  
   440  /*
   441  ** Parameters:
   442  **    filename=FILENAME          Name of file containing CSV content
   443  **    data=TEXT                  Direct CSV content.
   444  **    schema=SCHEMA              Alternative CSV schema.
   445  **    header=YES|NO              First row of CSV defines the names of
   446  **                               columns if "yes".  Default "no".
   447  **    columns=N                  Assume the CSV file contains N columns.
   448  **
   449  ** Only available if compiled with SQLITE_TEST:
   450  **    
   451  **    testflags=N                Bitmask of test flags.  Optional
   452  **
   453  ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
   454  ** and so forth.  If columns=N is omitted, then the file is opened and
   455  ** the number of columns in the first row is counted to determine the
   456  ** column count.  If header=YES, then the first row is skipped.
   457  */
   458  static int csvtabConnect(
   459    sqlite3 *db,
   460    void *pAux,
   461    int argc, const char *const*argv,
   462    sqlite3_vtab **ppVtab,
   463    char **pzErr
   464  ){
   465    CsvTable *pNew = 0;        /* The CsvTable object to construct */
   466    int bHeader = -1;          /* header= flags.  -1 means not seen yet */
   467    int rc = SQLITE_OK;        /* Result code from this routine */
   468    int i, j;                  /* Loop counters */
   469  #ifdef SQLITE_TEST
   470    int tstFlags = 0;          /* Value for testflags=N parameter */
   471  #endif
   472    int nCol = -99;            /* Value of the columns= parameter */
   473    CsvReader sRdr;            /* A CSV file reader used to store an error
   474                               ** message and/or to count the number of columns */
   475    static const char *azParam[] = {
   476       "filename", "data", "schema", 
   477    };
   478    char *azPValue[3];         /* Parameter values */
   479  # define CSV_FILENAME (azPValue[0])
   480  # define CSV_DATA     (azPValue[1])
   481  # define CSV_SCHEMA   (azPValue[2])
   482  
   483  
   484    assert( sizeof(azPValue)==sizeof(azParam) );
   485    memset(&sRdr, 0, sizeof(sRdr));
   486    memset(azPValue, 0, sizeof(azPValue));
   487    for(i=3; i<argc; i++){
   488      const char *z = argv[i];
   489      const char *zValue;
   490      for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
   491        if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
   492      }
   493      if( j<sizeof(azParam)/sizeof(azParam[0]) ){
   494        if( sRdr.zErr[0] ) goto csvtab_connect_error;
   495      }else
   496      if( (zValue = csv_parameter("header",6,z))!=0 ){
   497        int x;
   498        if( bHeader>=0 ){
   499          csv_errmsg(&sRdr, "more than one 'header' parameter");
   500          goto csvtab_connect_error;
   501        }
   502        x = csv_boolean(zValue);
   503        if( x==1 ){
   504          bHeader = 1;
   505        }else if( x==0 ){
   506          bHeader = 0;
   507        }else{
   508          csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
   509          goto csvtab_connect_error;
   510        }
   511      }else
   512  #ifdef SQLITE_TEST
   513      if( (zValue = csv_parameter("testflags",9,z))!=0 ){
   514        tstFlags = (unsigned int)atoi(zValue);
   515      }else
   516  #endif
   517      if( (zValue = csv_parameter("columns",7,z))!=0 ){
   518        if( nCol>0 ){
   519          csv_errmsg(&sRdr, "more than one 'columns' parameter");
   520          goto csvtab_connect_error;
   521        }
   522        nCol = atoi(zValue);
   523        if( nCol<=0 ){
   524          csv_errmsg(&sRdr, "must have at least one column");
   525          goto csvtab_connect_error;
   526        }
   527      }else
   528      {
   529        csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
   530        goto csvtab_connect_error;
   531      }
   532    }
   533    if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
   534      csv_errmsg(&sRdr, "must either filename= or data= but not both");
   535      goto csvtab_connect_error;
   536    }
   537    if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
   538      goto csvtab_connect_error;
   539    }
   540    pNew = sqlite3_malloc( sizeof(*pNew) );
   541    *ppVtab = (sqlite3_vtab*)pNew;
   542    if( pNew==0 ) goto csvtab_connect_oom;
   543    memset(pNew, 0, sizeof(*pNew));
   544    if( nCol>0 ){
   545      pNew->nCol = nCol;
   546    }else{
   547      do{
   548        csv_read_one_field(&sRdr);
   549        pNew->nCol++;
   550      }while( sRdr.cTerm==',' );
   551    }
   552    pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
   553    pNew->zData = CSV_DATA;          CSV_DATA = 0;
   554  #ifdef SQLITE_TEST
   555    pNew->tstFlags = tstFlags;
   556  #endif
   557    pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
   558    csv_reader_reset(&sRdr);
   559    if( CSV_SCHEMA==0 ){
   560      char *zSep = "";
   561      CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
   562      if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
   563      for(i=0; i<pNew->nCol; i++){
   564        CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
   565        zSep = ",";
   566      }
   567      CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
   568    }
   569    rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
   570    if( rc ) goto csvtab_connect_error;
   571    for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
   572      sqlite3_free(azPValue[i]);
   573    }
   574    return SQLITE_OK;
   575  
   576  csvtab_connect_oom:
   577    rc = SQLITE_NOMEM;
   578    csv_errmsg(&sRdr, "out of memory");
   579  
   580  csvtab_connect_error:
   581    if( pNew ) csvtabDisconnect(&pNew->base);
   582    for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
   583      sqlite3_free(azPValue[i]);
   584    }
   585    if( sRdr.zErr[0] ){
   586      sqlite3_free(*pzErr);
   587      *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
   588    }
   589    csv_reader_reset(&sRdr);
   590    if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
   591    return rc;
   592  }
   593  
   594  /*
   595  ** Reset the current row content held by a CsvCursor.
   596  */
   597  static void csvtabCursorRowReset(CsvCursor *pCur){
   598    CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
   599    int i;
   600    for(i=0; i<pTab->nCol; i++){
   601      sqlite3_free(pCur->azVal[i]);
   602      pCur->azVal[i] = 0;
   603      pCur->aLen[i] = 0;
   604    }
   605  }
   606  
   607  /*
   608  ** The xConnect and xCreate methods do the same thing, but they must be
   609  ** different so that the virtual table is not an eponymous virtual table.
   610  */
   611  static int csvtabCreate(
   612    sqlite3 *db,
   613    void *pAux,
   614    int argc, const char *const*argv,
   615    sqlite3_vtab **ppVtab,
   616    char **pzErr
   617  ){
   618   return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
   619  }
   620  
   621  /*
   622  ** Destructor for a CsvCursor.
   623  */
   624  static int csvtabClose(sqlite3_vtab_cursor *cur){
   625    CsvCursor *pCur = (CsvCursor*)cur;
   626    csvtabCursorRowReset(pCur);
   627    csv_reader_reset(&pCur->rdr);
   628    sqlite3_free(cur);
   629    return SQLITE_OK;
   630  }
   631  
   632  /*
   633  ** Constructor for a new CsvTable cursor object.
   634  */
   635  static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
   636    CsvTable *pTab = (CsvTable*)p;
   637    CsvCursor *pCur;
   638    size_t nByte;
   639    nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
   640    pCur = sqlite3_malloc64( nByte );
   641    if( pCur==0 ) return SQLITE_NOMEM;
   642    memset(pCur, 0, nByte);
   643    pCur->azVal = (char**)&pCur[1];
   644    pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
   645    *ppCursor = &pCur->base;
   646    if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
   647      csv_xfer_error(pTab, &pCur->rdr);
   648      return SQLITE_ERROR;
   649    }
   650    return SQLITE_OK;
   651  }
   652  
   653  
   654  /*
   655  ** Advance a CsvCursor to its next row of input.
   656  ** Set the EOF marker if we reach the end of input.
   657  */
   658  static int csvtabNext(sqlite3_vtab_cursor *cur){
   659    CsvCursor *pCur = (CsvCursor*)cur;
   660    CsvTable *pTab = (CsvTable*)cur->pVtab;
   661    int i = 0;
   662    char *z;
   663    do{
   664      z = csv_read_one_field(&pCur->rdr);
   665      if( z==0 ){
   666        break;
   667      }
   668      if( i<pTab->nCol ){
   669        if( pCur->aLen[i] < pCur->rdr.n+1 ){
   670          char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
   671          if( zNew==0 ){
   672            csv_errmsg(&pCur->rdr, "out of memory");
   673            csv_xfer_error(pTab, &pCur->rdr);
   674            break;
   675          }
   676          pCur->azVal[i] = zNew;
   677          pCur->aLen[i] = pCur->rdr.n+1;
   678        }
   679        memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
   680        i++;
   681      }
   682    }while( pCur->rdr.cTerm==',' );
   683    if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){
   684      pCur->iRowid = -1;
   685    }else{
   686      pCur->iRowid++;
   687      while( i<pTab->nCol ){
   688        sqlite3_free(pCur->azVal[i]);
   689        pCur->azVal[i] = 0;
   690        pCur->aLen[i] = 0;
   691        i++;
   692      }
   693    }
   694    return SQLITE_OK;
   695  }
   696  
   697  /*
   698  ** Return values of columns for the row at which the CsvCursor
   699  ** is currently pointing.
   700  */
   701  static int csvtabColumn(
   702    sqlite3_vtab_cursor *cur,   /* The cursor */
   703    sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
   704    int i                       /* Which column to return */
   705  ){
   706    CsvCursor *pCur = (CsvCursor*)cur;
   707    CsvTable *pTab = (CsvTable*)cur->pVtab;
   708    if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
   709      sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
   710    }
   711    return SQLITE_OK;
   712  }
   713  
   714  /*
   715  ** Return the rowid for the current row.
   716  */
   717  static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
   718    CsvCursor *pCur = (CsvCursor*)cur;
   719    *pRowid = pCur->iRowid;
   720    return SQLITE_OK;
   721  }
   722  
   723  /*
   724  ** Return TRUE if the cursor has been moved off of the last
   725  ** row of output.
   726  */
   727  static int csvtabEof(sqlite3_vtab_cursor *cur){
   728    CsvCursor *pCur = (CsvCursor*)cur;
   729    return pCur->iRowid<0;
   730  }
   731  
   732  /*
   733  ** Only a full table scan is supported.  So xFilter simply rewinds to
   734  ** the beginning.
   735  */
   736  static int csvtabFilter(
   737    sqlite3_vtab_cursor *pVtabCursor, 
   738    int idxNum, const char *idxStr,
   739    int argc, sqlite3_value **argv
   740  ){
   741    CsvCursor *pCur = (CsvCursor*)pVtabCursor;
   742    CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
   743    pCur->iRowid = 0;
   744    if( pCur->rdr.in==0 ){
   745      assert( pCur->rdr.zIn==pTab->zData );
   746      assert( pTab->iStart>=0 );
   747      assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
   748      pCur->rdr.iIn = pTab->iStart;
   749    }else{
   750      fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
   751      pCur->rdr.iIn = 0;
   752      pCur->rdr.nIn = 0;
   753    }
   754    return csvtabNext(pVtabCursor);
   755  }
   756  
   757  /*
   758  ** Only a forward full table scan is supported.  xBestIndex is mostly
   759  ** a no-op.  If CSVTEST_FIDX is set, then the presence of equality
   760  ** constraints lowers the estimated cost, which is fiction, but is useful
   761  ** for testing certain kinds of virtual table behavior.
   762  */
   763  static int csvtabBestIndex(
   764    sqlite3_vtab *tab,
   765    sqlite3_index_info *pIdxInfo
   766  ){
   767    pIdxInfo->estimatedCost = 1000000;
   768  #ifdef SQLITE_TEST
   769    if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
   770      /* The usual (and sensible) case is to always do a full table scan.
   771      ** The code in this branch only runs when testflags=1.  This code
   772      ** generates an artifical and unrealistic plan which is useful
   773      ** for testing virtual table logic but is not helpful to real applications.
   774      **
   775      ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
   776      ** table (even though it is not) and the cost of running the virtual table
   777      ** is reduced from 1 million to just 10.  The constraints are *not* marked
   778      ** as omittable, however, so the query planner should still generate a
   779      ** plan that gives a correct answer, even if they plan is not optimal.
   780      */
   781      int i;
   782      int nConst = 0;
   783      for(i=0; i<pIdxInfo->nConstraint; i++){
   784        unsigned char op;
   785        if( pIdxInfo->aConstraint[i].usable==0 ) continue;
   786        op = pIdxInfo->aConstraint[i].op;
   787        if( op==SQLITE_INDEX_CONSTRAINT_EQ 
   788         || op==SQLITE_INDEX_CONSTRAINT_LIKE
   789         || op==SQLITE_INDEX_CONSTRAINT_GLOB
   790        ){
   791          pIdxInfo->estimatedCost = 10;
   792          pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
   793          nConst++;
   794        }
   795      }
   796    }
   797  #endif
   798    return SQLITE_OK;
   799  }
   800  
   801  
   802  static sqlite3_module CsvModule = {
   803    0,                       /* iVersion */
   804    csvtabCreate,            /* xCreate */
   805    csvtabConnect,           /* xConnect */
   806    csvtabBestIndex,         /* xBestIndex */
   807    csvtabDisconnect,        /* xDisconnect */
   808    csvtabDisconnect,        /* xDestroy */
   809    csvtabOpen,              /* xOpen - open a cursor */
   810    csvtabClose,             /* xClose - close a cursor */
   811    csvtabFilter,            /* xFilter - configure scan constraints */
   812    csvtabNext,              /* xNext - advance a cursor */
   813    csvtabEof,               /* xEof - check for end of scan */
   814    csvtabColumn,            /* xColumn - read data */
   815    csvtabRowid,             /* xRowid - read data */
   816    0,                       /* xUpdate */
   817    0,                       /* xBegin */
   818    0,                       /* xSync */
   819    0,                       /* xCommit */
   820    0,                       /* xRollback */
   821    0,                       /* xFindMethod */
   822    0,                       /* xRename */
   823  };
   824  
   825  #ifdef SQLITE_TEST
   826  /*
   827  ** For virtual table testing, make a version of the CSV virtual table
   828  ** available that has an xUpdate function.  But the xUpdate always returns
   829  ** SQLITE_READONLY since the CSV file is not really writable.
   830  */
   831  static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
   832    return SQLITE_READONLY;
   833  }
   834  static sqlite3_module CsvModuleFauxWrite = {
   835    0,                       /* iVersion */
   836    csvtabCreate,            /* xCreate */
   837    csvtabConnect,           /* xConnect */
   838    csvtabBestIndex,         /* xBestIndex */
   839    csvtabDisconnect,        /* xDisconnect */
   840    csvtabDisconnect,        /* xDestroy */
   841    csvtabOpen,              /* xOpen - open a cursor */
   842    csvtabClose,             /* xClose - close a cursor */
   843    csvtabFilter,            /* xFilter - configure scan constraints */
   844    csvtabNext,              /* xNext - advance a cursor */
   845    csvtabEof,               /* xEof - check for end of scan */
   846    csvtabColumn,            /* xColumn - read data */
   847    csvtabRowid,             /* xRowid - read data */
   848    csvtabUpdate,            /* xUpdate */
   849    0,                       /* xBegin */
   850    0,                       /* xSync */
   851    0,                       /* xCommit */
   852    0,                       /* xRollback */
   853    0,                       /* xFindMethod */
   854    0,                       /* xRename */
   855  };
   856  #endif /* SQLITE_TEST */
   857  
   858  #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
   859  
   860  
   861  #ifdef _WIN32
   862  __declspec(dllexport)
   863  #endif
   864  /* 
   865  ** This routine is called when the extension is loaded.  The new
   866  ** CSV virtual table module is registered with the calling database
   867  ** connection.
   868  */
   869  int sqlite3_csv_init(
   870    sqlite3 *db, 
   871    char **pzErrMsg, 
   872    const sqlite3_api_routines *pApi
   873  ){
   874  #ifndef SQLITE_OMIT_VIRTUALTABLE	
   875    int rc;
   876    SQLITE_EXTENSION_INIT2(pApi);
   877    rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
   878  #ifdef SQLITE_TEST
   879    if( rc==SQLITE_OK ){
   880      rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
   881    }
   882  #endif
   883    return rc;
   884  #else
   885    return SQLITE_OK;
   886  #endif
   887  }