SQLite

View Ticket
Login
Ticket Hash: c25aab7e7ea55c861313076cae9257fc8f3fbdc0
Title: The ".import" command does not parse CSV correctly.
Status: Fixed Type: Feature_Request
Severity: Important Priority: Low
Subsystem: Shell Resolution: Fixed
Last Modified: 2014-03-11 13:17:03
Version Found In: 3.6.23.1
Description:
when importing CSV data, in text fields separated by double quotes, commas are treated as field separator.

live example:

770,"011",2002,2001247,3,4,"6/SOCJ,",1,0,20020131,122602,20020131,1,"FBLI"

in 7th field, which is "6/SOCJ," the comma inside the quotes is mis-interpreted as field separator, and application submits error:

FGLEDX line 136090: expected 14 columns of data but found 15

In case you need more information, please contact me at rafal@pies.pl.

Best regards,

Rafal Stanilewicz


drh added on 2009-08-13 15:19:32:
The ".import" function is in the command-line shell, which is a separate program that is independent of the SQLite core. I have retagged this ticket to indicate as much.

There is no such thing as a "CSV" standard. Different programs do different things. There might be users that depend on the current behavior. If we fix this for the OP, it could cause problems for others. So it is unclear whether or not we should do anything for this. We will simply leave the ticket open for the time being.


rogerb added on 2009-10-06 18:10:35:
See also http://www.sqlite.org/cvstrac/tktview?tn=3812 where the shell isn't correctly importing the same data it exported.

It would be reasonable to expect that whatever SQLite exports would import as exactly the same data.


anonymous claiming to be PM added on 2009-10-14 11:15:47:
See also old ticket http://www.sqlite.org/cvstrac/tktview?tn=3276. We still suffer from the inconsistency between the .import and export of the csv files. Although csv is not a real standard, at least you can expect consistent behaviour in sqlite (shell) itself.


rogerb added on 2009-10-27 20:00:06:
A request to tweak some of the behaviour of .import such as continuing on errors

http://www.sqlite.org/cvstrac/tktview?tn=1506


rogerb added on 2009-10-27 21:17:53:
Header rows with .import http://www.sqlite.org/cvstrac/tktview?tn=1313


anonymous added on 2010-01-06 15:37:44:
There is a well-defined standard for CSV files of mime type text/csv. It can be found at:

http://tools.ietf.org/html/rfc4180


anonymous added on 2010-05-12 17:53:05:
Well, I need to import and export files from/to OpenOffice.org Calc (for example).

I propose these modifications to make sqlite3 more easy to use (export/import with ".mode csv").

In "src/shell.c" Version 3.6.23.1 :


@@ -40,11 +40,11 @@

#if defined(HAVE_READLINE) && HAVE_READLINE==1 # include <readline/readline.h> # include <readline/history.h> #else -# define readline(p) local_getline(p,stdin) +# define readline(p) local_getline(p,stdin,0) # define add_history(X) # define read_history(X) # define write_history(X) # define stifle_history(X) #endif @@ -315,15 +315,16 @@ ** fails. ** ** The interface is like "readline" but no command-line editing ** is done. */ -static char *local_getline(char *zPrompt, FILE *in){ +static char *local_getline(char *zPrompt, FILE *in, const int bCsvMode){ char *zLine; int nLine; int n; int eol; + int bEscaped;

if( zPrompt && *zPrompt ){ printf("%s",zPrompt); fflush(stdout); } @@ -330,10 +331,11 @@ nLine = 100; zLine = malloc( nLine ); if( zLine==0 ) return 0; n = 0; eol = 0; + bEscaped = 0; while( !eol ){ if( n+100>nLine ){ nLine = nLine*2 + 100; zLine = realloc(zLine, nLine); if( zLine==0 ) return 0; @@ -345,12 +347,15 @@ } zLine[n] = 0; eol = 1; break; } - while( zLine[n] ){ n++; } - if( n>0 && zLine[n-1]=='\n' ){ + while( zLine[n] ){ + if( zLine[n]=='"' && bCsvMode ) bEscaped = 1 - bEscaped; + n++; + } + if( !bEscaped && n>0 && zLine[n-1]=='\n' ){ n--; if( n>0 && zLine[n-1]=='\r' ) n--; zLine[n] = 0; eol = 1; } @@ -367,11 +372,11 @@ */ static char *one_input_line(const char *zPrior, FILE *in){ char *zPrompt; char *zResult; if( in!=0 ){ - return local_getline(0, in); + return local_getline(0, in, 0); } if( zPrior && zPrior[0] ){ zPrompt = continuePrompt; }else{ zPrompt = mainPrompt; @@ -1625,25 +1630,45 @@ sqlite3_finalize(pStmt); return 1; } sqlite3_exec(p->db, "BEGIN", 0, 0, 0); zCommit = "COMMIT"; - while( (zLine = local_getline(0, in))!=0 ){ + while( (zLine = local_getline(0, in, 1))!=0 ){ char *z; + char *q; + int bEscaped; + int bFieldQuoted; + bEscaped = 0; /* to escape quotes */ i = 0; lineno++; - azCol[0] = zLine; - for(i=0, z=zLine; *z && *z!='\n' && *z!='\r'; z++){ - if( *z==p->separator[0] && strncmp(z, p->separator, nSep)==0 ){ + if( *zLine=='"' ){ + azCol[0] = zLine + 1; /* ignore the first quote */ + bFieldQuoted = 1; + }else{ + azCol[0] = zLine; + bFieldQuoted = 0; + } + for(i=0, z=zLine, q=zLine; *z ; z++){ + if( *z=='"' ) bEscaped = 1 - bEscaped; + if( bEscaped==0 && *z==p->separator[0] && strncmp(z, p->separator, nSep)==0 ){ + if( bFieldQuoted==1 && *q=='"') *q = 0; /* ignore the last quote */ *z = 0; i++; if( i<nCol ){ azCol[i] = &z[nSep]; + if( *azCol[i]=='"' ){ + azCol[i]++; /* ignore the first quote */ + bFieldQuoted = 1; + }else{ + bFieldQuoted = 0; + } z += nSep-1; } } + q = z; } /* end for */ + if( bEscaped==0 && bFieldQuoted==1 && *q=='"' ) *q = 0; /* ignore the last quote */ *z = 0; if( i+1!=nCol ){ fprintf(stderr, "Error: %s line %d: expected %d columns of data but found %d\n", zFile, lineno, nCol, i+1); @@ -1651,10 +1676,16 @@ free(zLine); rc = 1; break; /* from while */ } for(i=0; i<nCol; i++){ + /* remove double quotes */ + for( bEscaped=0, z=azCol[i], q=azCol[i]; *z ; z++, q++){ + if( *z=='"' ) z++; + *q = *z; + } + *q = 0; sqlite3_bind_text(pStmt, i+1, azCol[i], -1, SQLITE_STATIC); } sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); free(zLine);