/ Check-in [7c15d762]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The ".import" command of the shell, and the csv virtual table extension both ignore a single UTF-8 BOM at the beginning of their input.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:7c15d762d99c2e3e534cd35dfe25ddcd317637eb1f2655fd24c2dd5f9d5a7613
User & Date: drh 2017-06-26 18:42:23
Context
2017-06-26
21:08
Add the -withoutnulls option to the "db eval" method in the TCL interface. check-in: 18f0616e user: drh tags: trunk
18:42
The ".import" command of the shell, and the csv virtual table extension both ignore a single UTF-8 BOM at the beginning of their input. check-in: 7c15d762 user: drh tags: trunk
2017-06-24
19:21
Disable shell tests for the .schema command if virtual tables are not available. check-in: c8186874 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/misc/csv.c.

73
74
75
76
77
78
79

80
81
82
83
84
85
86
..
87
88
89
90
91
92
93

94
95
96
97
98
99
100
...
247
248
249
250
251
252
253















254
255
256
257
258
259
260
261
262
263
264

265
266
267
268
269
270
271
typedef struct CsvReader CsvReader;
struct CsvReader {
  FILE *in;              /* Read the CSV text from this input stream */
  char *z;               /* Accumulated text for a field */
  int n;                 /* Number of bytes in z */
  int nAlloc;            /* Space allocated for z[] */
  int nLine;             /* Current line number */

  char cTerm;            /* Character that terminated the most recent field */
  size_t iIn;            /* Next unread character in the input buffer */
  size_t nIn;            /* Number of characters in the input buffer */
  char *zIn;             /* The input buffer */
  char zErr[CSV_MXERR];  /* Error message */
};

................................................................................
/* Initialize a CsvReader object */
static void csv_reader_init(CsvReader *p){
  p->in = 0;
  p->z = 0;
  p->n = 0;
  p->nAlloc = 0;
  p->nLine = 0;

  p->nIn = 0;
  p->zIn = 0;
  p->zErr[0] = 0;
}

/* Close and reset a CsvReader object */
static void csv_reader_reset(CsvReader *p){
................................................................................
        }
      }
      if( csv_append(p, (char)c) ) return 0;
      ppc = pc;
      pc = c;
    }
  }else{















    while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
      if( csv_append(p, (char)c) ) return 0;
      c = csv_getc(p);
    }
    if( c=='\n' ){
      p->nLine++;
      if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
    }
    p->cTerm = (char)c;
  }
  if( p->z ) p->z[p->n] = 0;

  return p->z;
}


/* Forward references to the various virtual table methods implemented
** in this file. */
static int csvtabCreate(sqlite3*, void*, int, const char*const*, 







>







 







>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











>







73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
..
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
...
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
typedef struct CsvReader CsvReader;
struct CsvReader {
  FILE *in;              /* Read the CSV text from this input stream */
  char *z;               /* Accumulated text for a field */
  int n;                 /* Number of bytes in z */
  int nAlloc;            /* Space allocated for z[] */
  int nLine;             /* Current line number */
  int bNotFirst;         /* True if prior text has been seen */
  char cTerm;            /* Character that terminated the most recent field */
  size_t iIn;            /* Next unread character in the input buffer */
  size_t nIn;            /* Number of characters in the input buffer */
  char *zIn;             /* The input buffer */
  char zErr[CSV_MXERR];  /* Error message */
};

................................................................................
/* Initialize a CsvReader object */
static void csv_reader_init(CsvReader *p){
  p->in = 0;
  p->z = 0;
  p->n = 0;
  p->nAlloc = 0;
  p->nLine = 0;
  p->bNotFirst = 0;
  p->nIn = 0;
  p->zIn = 0;
  p->zErr[0] = 0;
}

/* Close and reset a CsvReader object */
static void csv_reader_reset(CsvReader *p){
................................................................................
        }
      }
      if( csv_append(p, (char)c) ) return 0;
      ppc = pc;
      pc = c;
    }
  }else{
    /* If this is the first field being parsed and it begins with the
    ** UTF-8 BOM  (0xEF BB BF) then skip the BOM */
    if( (c&0xff)==0xef && p->bNotFirst==0 ){
      csv_append(p, c);
      c = csv_getc(p);
      if( (c&0xff)==0xbb ){
        csv_append(p, c);
        c = csv_getc(p);
        if( (c&0xff)==0xbf ){
          p->bNotFirst = 1;
          p->n = 0;
          return csv_read_one_field(p);
        }
      }
    }
    while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
      if( csv_append(p, (char)c) ) return 0;
      c = csv_getc(p);
    }
    if( c=='\n' ){
      p->nLine++;
      if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
    }
    p->cTerm = (char)c;
  }
  if( p->z ) p->z[p->n] = 0;
  p->bNotFirst = 1;
  return p->z;
}


/* Forward references to the various virtual table methods implemented
** in this file. */
static int csvtabCreate(sqlite3*, void*, int, const char*const*, 

Changes to src/shell.c.

3818
3819
3820
3821
3822
3823
3824

3825
3826
3827
3828
3829
3830
3831
....
3897
3898
3899
3900
3901
3902
3903















3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914

3915
3916
3917
3918
3919
3920
3921
struct ImportCtx {
  const char *zFile;  /* Name of the input file */
  FILE *in;           /* Read the CSV text from this input stream */
  char *z;            /* Accumulated text for a field */
  int n;              /* Number of bytes in z */
  int nAlloc;         /* Space allocated for z[] */
  int nLine;          /* Current line number */

  int cTerm;          /* Character that terminated the most recent field */
  int cColSep;        /* The column separator character.  (Usually ",") */
  int cRowSep;        /* The row separator character.  (Usually "\n") */
};

/* Append a single byte to z[] */
static void import_append_char(ImportCtx *p, int c){
................................................................................
        break;
      }
      import_append_char(p, c);
      ppc = pc;
      pc = c;
    }
  }else{















    while( c!=EOF && c!=cSep && c!=rSep ){
      import_append_char(p, c);
      c = fgetc(p->in);
    }
    if( c==rSep ){
      p->nLine++;
      if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
    }
    p->cTerm = c;
  }
  if( p->z ) p->z[p->n] = 0;

  return p->z;
}

/* Read a single field of ASCII delimited text.
**
**   +  Input comes from p->in.
**   +  Store results in p->z of length p->n.  Space to hold p->z comes







>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











>







3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
....
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
struct ImportCtx {
  const char *zFile;  /* Name of the input file */
  FILE *in;           /* Read the CSV text from this input stream */
  char *z;            /* Accumulated text for a field */
  int n;              /* Number of bytes in z */
  int nAlloc;         /* Space allocated for z[] */
  int nLine;          /* Current line number */
  int bNotFirst;      /* True if one or more bytes already read */
  int cTerm;          /* Character that terminated the most recent field */
  int cColSep;        /* The column separator character.  (Usually ",") */
  int cRowSep;        /* The row separator character.  (Usually "\n") */
};

/* Append a single byte to z[] */
static void import_append_char(ImportCtx *p, int c){
................................................................................
        break;
      }
      import_append_char(p, c);
      ppc = pc;
      pc = c;
    }
  }else{
    /* If this is the first field being parsed and it begins with the
    ** UTF-8 BOM  (0xEF BB BF) then skip the BOM */
    if( (c&0xff)==0xef && p->bNotFirst==0 ){
      import_append_char(p, c);
      c = fgetc(p->in);
      if( (c&0xff)==0xbb ){
        import_append_char(p, c);
        c = fgetc(p->in);
        if( (c&0xff)==0xbf ){
          p->bNotFirst = 1;
          p->n = 0;
          return csv_read_one_field(p);
        }
      }
    }
    while( c!=EOF && c!=cSep && c!=rSep ){
      import_append_char(p, c);
      c = fgetc(p->in);
    }
    if( c==rSep ){
      p->nLine++;
      if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
    }
    p->cTerm = c;
  }
  if( p->z ) p->z[p->n] = 0;
  p->bNotFirst = 1;
  return p->z;
}

/* Read a single field of ASCII delimited text.
**
**   +  Input comes from p->in.
**   +  Store results in p->z of length p->n.  Space to hold p->z comes

Changes to test/shell5.test.

179
180
181
182
183
184
185






























186
187
188
189
190
191
192
...
206
207
208
209
210
211
212

213
214
215
216
217
218
219
220
  set res [catchcmd "test.db" {.import shell5.csv t1
SELECT COUNT(*) FROM t1;}]
} {0 7}

do_test shell5-1.4.10.2 {
  catchcmd "test.db" {SELECT b FROM t1 WHERE a='7';}
} {0 {Now is the time for all good men to come to the aid of their country.}}































# check importing very long field
do_test shell5-1.5.1 {
  set str [string repeat X 999]
  set in [open shell5.csv w]
  puts $in "8|$str"
  close $in
................................................................................
  for {set i 1} {$i<$cols} {incr i} {
    append data "$i|"
  }
  append data "$cols"
  set in [open shell5.csv w]
  puts $in $data
  close $in

  set res [catchcmd "test.db" {.import shell5.csv t2
SELECT COUNT(*) FROM t2;}]
} {0 1}

# try importing a large number of rows
set rows 9999
do_test shell5-1.7.1 {
  set in [open shell5.csv w]







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
|







179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
  set res [catchcmd "test.db" {.import shell5.csv t1
SELECT COUNT(*) FROM t1;}]
} {0 7}

do_test shell5-1.4.10.2 {
  catchcmd "test.db" {SELECT b FROM t1 WHERE a='7';}
} {0 {Now is the time for all good men to come to the aid of their country.}}

# import file with 2 rows, 2 columns and an initial BOM
#
do_test shell5-1.4.11 {
  set in [open shell5.csv wb]
  puts $in "\xef\xbb\xbf2|3"
  puts $in "4|5"
  close $in
  set res [catchcmd "test.db" {CREATE TABLE t2(x INT, y INT);
.import shell5.csv t2
.mode quote
.header on
SELECT * FROM t2;}]
 string map {\n | \n\r |} $res
} {0 {'x','y'|2,3|4,5}}

# import file with 2 rows, 2 columns or text with an initial BOM
#
do_test shell5-1.4.12 {
  set in [open shell5.csv wb]
  puts $in "\xef\xbb\xbf\"two\"|3"
  puts $in "4|5"
  close $in
  set res [catchcmd "test.db" {DELETE FROM t2;
.import shell5.csv t2
.mode quote
.header on
SELECT * FROM t2;}]
 string map {\n | \n\r |} $res
} {0 {'x','y'|'two',3|4,5}}

# check importing very long field
do_test shell5-1.5.1 {
  set str [string repeat X 999]
  set in [open shell5.csv w]
  puts $in "8|$str"
  close $in
................................................................................
  for {set i 1} {$i<$cols} {incr i} {
    append data "$i|"
  }
  append data "$cols"
  set in [open shell5.csv w]
  puts $in $data
  close $in
  set res [catchcmd "test.db" {DROP TABLE IF EXISTS t2;
.import shell5.csv t2
SELECT COUNT(*) FROM t2;}]
} {0 1}

# try importing a large number of rows
set rows 9999
do_test shell5-1.7.1 {
  set in [open shell5.csv w]