SQLite

Check-in [0406ecbbe7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to the CSV virtual table.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 0406ecbbe75513e342040b71fdd342462222dbb3820486b5f745d7865805c00b
User & Date: drh 2018-11-16 01:42:26.183
Context
2018-11-16
08:36
Fix a problem with virtual table "fsdir" and some join queries. (check-in: 7fffcee0fc user: dan tags: trunk)
01:42
Improvements to the CSV virtual table. (check-in: 0406ecbbe7 user: drh tags: trunk)
2018-11-15
19:12
Fix typos in the previous check-in. (check-in: 9b37bbf5f3 user: mistachkin tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/misc/csv.c.
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
**
** Usage:
**
**    .load ./csv
**    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
**    SELECT * FROM csv;
**
** The columns are named "c1", "c2", "c3", ... by default.  But the
** application can define its own CREATE TABLE statement as an additional
** parameter.  For example:
**
**    CREATE VIRTUAL TABLE temp.csv2 USING csv(
**       filename = "../http.log",
**       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
**    );
**
** Instead of specifying a file, the text of the CSV can be loaded using
** the data= parameter.
**
** If the columns=N parameter is supplied, then the CSV file is assumed to have
** N columns.  If the columns parameter is omitted, the CSV file is opened
** as soon as the virtual table is constructed and the first row of the CSV
** is read in order to count the tables.
**
** Some extra debugging features (used for testing virtual tables) are available
** if this module is compiled with -DSQLITE_TEST.
*/
#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1
#include <string.h>







|
|
|










|
|
|







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
**
** Usage:
**
**    .load ./csv
**    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
**    SELECT * FROM csv;
**
** The columns are named "c1", "c2", "c3", ... by default.  Or the
** application can define its own CREATE TABLE statement using the
** schema= parameter, like this:
**
**    CREATE VIRTUAL TABLE temp.csv2 USING csv(
**       filename = "../http.log",
**       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
**    );
**
** Instead of specifying a file, the text of the CSV can be loaded using
** the data= parameter.
**
** If the columns=N parameter is supplied, then the CSV file is assumed to have
** N columns.  If both the columns= and schema= parameters are omitted, then
** the number and names of the columns is determined by the first line of
** the CSV input.
**
** Some extra debugging features (used for testing virtual tables) are available
** if this module is compiled with -DSQLITE_TEST.
*/
#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1
#include <string.h>
432
433
434
435
436
437
438




























439
440
441
442
443
444
445
   || (z[0]=='0' && z[1]==0)
  ){
    return 0;
  }
  return -1;
}






























/*
** Parameters:
**    filename=FILENAME          Name of file containing CSV content
**    data=TEXT                  Direct CSV content.
**    schema=SCHEMA              Alternative CSV schema.
**    header=YES|NO              First row of CSV defines the names of







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
   || (z[0]=='0' && z[1]==0)
  ){
    return 0;
  }
  return -1;
}

/* Check to see if the string is of the form:  "TAG = BOOLEAN" or just "TAG".
** If it is, set *pValue to be the value of the boolean ("true" if there is
** not "= BOOLEAN" component) and return non-zero.  If the input string
** does not begin with TAG, return zero.
*/
static int csv_boolean_parameter(
  const char *zTag,       /* Tag we are looking for */
  int nTag,               /* Size of the tag in bytes */
  const char *z,          /* Input parameter */
  int *pValue             /* Write boolean value here */
){
  int b;
  z = csv_skip_whitespace(z);
  if( strncmp(zTag, z, nTag)!=0 ) return 0;
  z = csv_skip_whitespace(z + nTag);
  if( z[0]==0 ){
    *pValue = 1;
    return 1;
  }
  if( z[0]!='=' ) return 0;
  z = csv_skip_whitespace(z+1);
  b = csv_boolean(z);
  if( b>=0 ){
    *pValue = b;
    return 1;
  }
  return 0;
}

/*
** Parameters:
**    filename=FILENAME          Name of file containing CSV content
**    data=TEXT                  Direct CSV content.
**    schema=SCHEMA              Alternative CSV schema.
**    header=YES|NO              First row of CSV defines the names of
465
466
467
468
469
470
471

472
473
474
475
476
477
478
  CsvTable *pNew = 0;        /* The CsvTable object to construct */
  int bHeader = -1;          /* header= flags.  -1 means not seen yet */
  int rc = SQLITE_OK;        /* Result code from this routine */
  int i, j;                  /* Loop counters */
#ifdef SQLITE_TEST
  int tstFlags = 0;          /* Value for testflags=N parameter */
#endif

  int nCol = -99;            /* Value of the columns= parameter */
  CsvReader sRdr;            /* A CSV file reader used to store an error
                             ** message and/or to count the number of columns */
  static const char *azParam[] = {
     "filename", "data", "schema", 
  };
  char *azPValue[3];         /* Parameter values */







>







493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
  CsvTable *pNew = 0;        /* The CsvTable object to construct */
  int bHeader = -1;          /* header= flags.  -1 means not seen yet */
  int rc = SQLITE_OK;        /* Result code from this routine */
  int i, j;                  /* Loop counters */
#ifdef SQLITE_TEST
  int tstFlags = 0;          /* Value for testflags=N parameter */
#endif
  int b;                     /* Value of a boolean parameter */
  int nCol = -99;            /* Value of the columns= parameter */
  CsvReader sRdr;            /* A CSV file reader used to store an error
                             ** message and/or to count the number of columns */
  static const char *azParam[] = {
     "filename", "data", "schema", 
  };
  char *azPValue[3];         /* Parameter values */
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536


537

538
539
540
541
542
543


























544
545
546











547
548
549
550


551
552
553
554
555
556

557
558
559
560
561
562
563
564
565
566
567

568

569


570

571
572
573
574
575
576
577
    const char *zValue;
    for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
      if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
    }
    if( j<sizeof(azParam)/sizeof(azParam[0]) ){
      if( sRdr.zErr[0] ) goto csvtab_connect_error;
    }else
    if( (zValue = csv_parameter("header",6,z))!=0 ){
      int x;
      if( bHeader>=0 ){
        csv_errmsg(&sRdr, "more than one 'header' parameter");
        goto csvtab_connect_error;
      }
      x = csv_boolean(zValue);
      if( x==1 ){
        bHeader = 1;
      }else if( x==0 ){
        bHeader = 0;
      }else{
        csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
        goto csvtab_connect_error;
      }
    }else
#ifdef SQLITE_TEST
    if( (zValue = csv_parameter("testflags",9,z))!=0 ){
      tstFlags = (unsigned int)atoi(zValue);
    }else
#endif
    if( (zValue = csv_parameter("columns",7,z))!=0 ){
      if( nCol>0 ){
        csv_errmsg(&sRdr, "more than one 'columns' parameter");
        goto csvtab_connect_error;
      }
      nCol = atoi(zValue);
      if( nCol<=0 ){
        csv_errmsg(&sRdr, "must have at least one column");
        goto csvtab_connect_error;
      }
    }else
    {
      csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
      goto csvtab_connect_error;
    }
  }
  if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
    csv_errmsg(&sRdr, "must either filename= or data= but not both");
    goto csvtab_connect_error;
  }


  if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){

    goto csvtab_connect_error;
  }
  pNew = sqlite3_malloc( sizeof(*pNew) );
  *ppVtab = (sqlite3_vtab*)pNew;
  if( pNew==0 ) goto csvtab_connect_oom;
  memset(pNew, 0, sizeof(*pNew));


























  if( nCol>0 ){
    pNew->nCol = nCol;
  }else{











    do{
      csv_read_one_field(&sRdr);
      pNew->nCol++;
    }while( sRdr.cTerm==',' );


  }
  pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
  pNew->zData = CSV_DATA;          CSV_DATA = 0;
#ifdef SQLITE_TEST
  pNew->tstFlags = tstFlags;
#endif

  pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
  csv_reader_reset(&sRdr);
  if( CSV_SCHEMA==0 ){
    char *zSep = "";
    CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
    if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
    for(i=0; i<pNew->nCol; i++){
      CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
      zSep = ",";
    }
    CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);

  }

  rc = sqlite3_declare_vtab(db, CSV_SCHEMA);


  if( rc ) goto csvtab_connect_error;

  for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
    sqlite3_free(azPValue[i]);
  }
  return SQLITE_OK;

csvtab_connect_oom:
  rc = SQLITE_NOMEM;







|
<




<
<
|
<
<
<
<
<
<













|




|




|


>
>
|
>






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
>
>
>
>
>
>
>
>
>
>
>




>
>






>
|
<
|
<
<
<
|
<
<
|
<
>

>

>
>
|
>







518
519
520
521
522
523
524
525

526
527
528
529


530






531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620

621



622


623

624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
    const char *zValue;
    for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
      if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
    }
    if( j<sizeof(azParam)/sizeof(azParam[0]) ){
      if( sRdr.zErr[0] ) goto csvtab_connect_error;
    }else
    if( csv_boolean_parameter("header",6,z,&b) ){

      if( bHeader>=0 ){
        csv_errmsg(&sRdr, "more than one 'header' parameter");
        goto csvtab_connect_error;
      }


      bHeader = b;






    }else
#ifdef SQLITE_TEST
    if( (zValue = csv_parameter("testflags",9,z))!=0 ){
      tstFlags = (unsigned int)atoi(zValue);
    }else
#endif
    if( (zValue = csv_parameter("columns",7,z))!=0 ){
      if( nCol>0 ){
        csv_errmsg(&sRdr, "more than one 'columns' parameter");
        goto csvtab_connect_error;
      }
      nCol = atoi(zValue);
      if( nCol<=0 ){
        csv_errmsg(&sRdr, "column= value must be positive");
        goto csvtab_connect_error;
      }
    }else
    {
      csv_errmsg(&sRdr, "bad parameter: '%s'", z);
      goto csvtab_connect_error;
    }
  }
  if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
    csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
    goto csvtab_connect_error;
  }

  if( (nCol<=0 || bHeader==1)
   && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
  ){
    goto csvtab_connect_error;
  }
  pNew = sqlite3_malloc( sizeof(*pNew) );
  *ppVtab = (sqlite3_vtab*)pNew;
  if( pNew==0 ) goto csvtab_connect_oom;
  memset(pNew, 0, sizeof(*pNew));
  if( CSV_SCHEMA==0 ){
    sqlite3_str *pStr = sqlite3_str_new(0);
    char *zSep = "";
    int iCol = 0;
    sqlite3_str_appendf(pStr, "CREATE TABLE x(");
    if( nCol<0 && bHeader<1 ){
      nCol = 0;
      do{
        csv_read_one_field(&sRdr);
        nCol++;
      }while( sRdr.cTerm==',' );
    }
    if( nCol>0 && bHeader<1 ){
      for(iCol=0; iCol<nCol; iCol++){
        sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
        zSep = ",";
      }
    }else{
      do{
        char *z = csv_read_one_field(&sRdr);
        if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
          sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
          zSep = ",";
          iCol++;
        }
      }while( sRdr.cTerm==',' );
      if( nCol<0 ){
        nCol = iCol;
      }else{
        while( iCol<nCol ){
          sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
          zSep = ",";
        }
      }
    }
    pNew->nCol = nCol;
    sqlite3_str_appendf(pStr, ")");
    CSV_SCHEMA = sqlite3_str_finish(pStr);
    if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
  }else if( nCol<0 ){
    do{
      csv_read_one_field(&sRdr);
      pNew->nCol++;
    }while( sRdr.cTerm==',' );
  }else{
    pNew->nCol = nCol;
  }
  pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
  pNew->zData = CSV_DATA;          CSV_DATA = 0;
#ifdef SQLITE_TEST
  pNew->tstFlags = tstFlags;
#endif
  if( bHeader!=1 ){
    pNew->iStart = 0;

  }else if( pNew->zData ){



    pNew->iStart = (int)sRdr.iIn;


  }else{

    pNew->iStart = ftell(sRdr.in);
  }
  csv_reader_reset(&sRdr);
  rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
  if( rc ){
    csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
    goto csvtab_connect_error;
  }
  for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
    sqlite3_free(azPValue[i]);
  }
  return SQLITE_OK;

csvtab_connect_oom:
  rc = SQLITE_NOMEM;
Changes to test/csv01.test.
33
34
35
36
37
38
39

























































40
41
42
43
44
45
46
} {9 10 11 12}
do_execsql_test 1.1 {
  SELECT * FROM t1 WHERE c1='10';
} {9 10 11 12}
do_execsql_test 1.2 {
  SELECT rowid FROM t1;
} {1 2 3 4}


























































do_execsql_test 2.0 {
  DROP TABLE t1;
  CREATE VIRTUAL TABLE temp.t2 USING csv(
    data=
'1,2,3,4
5,6,7,8







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
} {9 10 11 12}
do_execsql_test 1.1 {
  SELECT * FROM t1 WHERE c1='10';
} {9 10 11 12}
do_execsql_test 1.2 {
  SELECT rowid FROM t1;
} {1 2 3 4}

do_execsql_test 1.3 {
  DROP TABLE temp.t1;
  CREATE VIRTUAL TABLE temp.t1 USING csv(
    data=
'a,b,"mix-bloom-eel","soft opinion"
1,2,3,4
5,6,7,8
9,10,11,12
13,14,15,16
',
    header=1
  );
  SELECT * FROM t1 WHERE "soft opinion"=12;
} {9 10 11 12}
do_execsql_test 1.4 {
  SELECT name FROM pragma_table_xinfo('t1');
} {a b mix-bloom-eel {soft opinion}}

do_execsql_test 1.5 {
  DROP TABLE temp.t1;
  CREATE VIRTUAL TABLE temp.t1 USING csv(
    data=
'a,b,"mix-bloom-eel","soft opinion"
1,2,3,4
5,6,7,8
9,10,11,12
13,14,15,16
',
    header=false
  );
  SELECT * FROM t1 WHERE c1='b';
} {a b mix-bloom-eel {soft opinion}}
do_execsql_test 1.6 {
  SELECT name FROM pragma_table_xinfo('t1');
} {c0 c1 c2 c3}

do_execsql_test 1.7 {
  DROP TABLE temp.t1;
  CREATE VIRTUAL TABLE temp.t1 USING csv(
    data=
'a,b,"mix-bloom-eel","soft opinion"
1,2,3,4
5,6,7,8
9,10,11,12
13,14,15,16
',
    header,
    schema='CREATE TABLE x(x0,x1,x2,x3,x4)',
    columns=5
  );
  SELECT * FROM t1 WHERE x1='6';
} {5 6 7 8 {}}
do_execsql_test 1.8 {
  SELECT name FROM pragma_table_xinfo('t1');
} {x0 x1 x2 x3 x4}


do_execsql_test 2.0 {
  DROP TABLE t1;
  CREATE VIRTUAL TABLE temp.t2 USING csv(
    data=
'1,2,3,4
5,6,7,8
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
9,10,11,12
13,14,15,16',
    columns=4,
    schema=
      'CREATE TABLE t3(a,b,c,d,PRIMARY KEY(a,b)) WITHOUT ROWID',
    testflags=1
  );
} {1 {vtable constructor failed: t4}}

# WITHOUT ROWID tables with a single-column PRIMARY KEY may be writable.
do_catchsql_test 4.1 {
  DROP TABLE IF EXISTS t4;
  CREATE VIRTUAL TABLE temp.t4 USING csv_wr(
    data=
'1,2,3,4







|







160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
9,10,11,12
13,14,15,16',
    columns=4,
    schema=
      'CREATE TABLE t3(a,b,c,d,PRIMARY KEY(a,b)) WITHOUT ROWID',
    testflags=1
  );
} {1 {bad schema: 'CREATE TABLE t3(a,b,c,d,PRIMARY KEY(a,b)) WITHOUT ROWID' - not an error}}

# WITHOUT ROWID tables with a single-column PRIMARY KEY may be writable.
do_catchsql_test 4.1 {
  DROP TABLE IF EXISTS t4;
  CREATE VIRTUAL TABLE temp.t4 USING csv_wr(
    data=
'1,2,3,4
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
      9,10,11,12
      13,14,15,16',
      columns=4,
      schema=
      'CREATE TABLE t3(a,b,c,d) WITHOUT ROWID',
      testflags=1
      );
} {1 {vtable constructor failed: t5}}

# 2018-04-24
# Memory leak reported on the sqlite-users mailing list by Ralf Junker.
#
do_catchsql_test 4.3 {
  CREATE VIRTUAL TABLE IF NOT EXISTS temp.t1
  USING csv(filename='FileDoesNotExist.csv');







|







191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
      9,10,11,12
      13,14,15,16',
      columns=4,
      schema=
      'CREATE TABLE t3(a,b,c,d) WITHOUT ROWID',
      testflags=1
      );
} {1 {bad schema: 'CREATE TABLE t3(a,b,c,d) WITHOUT ROWID' - PRIMARY KEY missing on table t3}}

# 2018-04-24
# Memory leak reported on the sqlite-users mailing list by Ralf Junker.
#
do_catchsql_test 4.3 {
  CREATE VIRTUAL TABLE IF NOT EXISTS temp.t1
  USING csv(filename='FileDoesNotExist.csv');