Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add the data= parameter to the CSV virtual table extension. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | without-rowid-vtab |
Files: | files | file ages | folders |
SHA1: |
769191042aa14e6eccdfe2391fc10111 |
User & Date: | drh 2016-06-02 17:44:24.492 |
Context
2016-06-02
| ||
23:13 | Add the CSV extension to the test fixture. Fix a memory leak in the CSV extension. Add test cases for the CSV extension, including one that uses a WITHOUT ROWID virtual table participating in the OR optimization. (check-in: 95f483e86e user: drh tags: without-rowid-vtab) | |
17:44 | Add the data= parameter to the CSV virtual table extension. (check-in: 769191042a user: drh tags: without-rowid-vtab) | |
16:22 | Fix corner cases in the WITHOUT ROWID virtual table logic. (check-in: a393bbb972 user: drh tags: without-rowid-vtab) | |
Changes
Changes to ext/misc/csv.c.
︙ | ︙ | |||
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | #else # define CSV_NOINLINE #endif /* Max size of the error message in a CsvReader */ #define CSV_MXERR 200 /* A context object used when read a CSV file. */ typedef struct CsvReader CsvReader; struct CsvReader { FILE *in; /* Read the CSV text from this input stream */ char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ | > > > | > > > | > > > > > > > | > > > | > > > > > > > > > > | | > | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | #else # define CSV_NOINLINE #endif /* Max size of the error message in a CsvReader */ #define CSV_MXERR 200 /* Size of the CsvReader input buffer */ #define CSV_INBUFSZ 1024 /* A context object used when read a CSV file. */ typedef struct CsvReader CsvReader; struct CsvReader { FILE *in; /* Read the CSV text from this input stream */ char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ char cTerm; /* Character that terminated the most recent field */ size_t iIn; /* Next unread character in the input buffer */ size_t nIn; /* Number of characters in the input buffer */ char *zIn; /* The input buffer */ char zErr[CSV_MXERR]; /* Error message */ }; /* Initialize a CsvReader object */ static void csv_reader_init(CsvReader *p){ p->in = 0; p->z = 0; p->n = 0; p->nAlloc = 0; p->nLine = 0; p->nIn = 0; p->zIn = 0; p->zErr[0] = 0; } /* Close and reset a CsvReader object */ static void csv_reader_reset(CsvReader *p){ if( p->in ){ fclose(p->in); sqlite3_free(p->zIn); } sqlite3_free(p->z); csv_reader_init(p); } /* Report an error on a CsvReader */ static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ va_list ap; va_start(ap, zFormat); sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); va_end(ap); } /* Open the file associated with a CsvReader ** Return the number of errors. */ static int csv_reader_open( CsvReader *p, /* The reader to open */ const char *zFilename, /* Read from this filename */ const char *zData /* ... or use this data */ ){ if( zFilename ){ p->zIn = sqlite3_malloc( CSV_INBUFSZ ); if( p->zIn==0 ){ csv_errmsg(p, "out of memory"); return 1; } p->in = fopen(zFilename, "rb"); if( p->in==0 ){ csv_reader_reset(p); csv_errmsg(p, "cannot open '%s' for reading", zFilename); return 1; } }else{ assert( p->in==0 ); p->zIn = (char*)zData; p->nIn = strlen(zData); } return 0; } /* The input buffer has overflowed. Refill the input buffer, then ** return the next character */ static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ size_t got; assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ assert( p->in!=0 ); /* Only called if reading froma file */ got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); if( got==0 ) return EOF; p->nIn = got; p->iIn = 1; return p->zIn[0]; } /* Return the next character of input. Return EOF at end of input. */ static int csv_getc(CsvReader *p){ if( p->iIn >= p->nIn ){ if( p->in!=0 ) return csv_getc_refill(p); return EOF; } return p->zIn[p->iIn++]; } /* Increase the size of p->z and append character c to the end. ** Return 0 on success and non-zero if there is an OOM error */ static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ char *zNew; int nNew = p->nAlloc*2 + 100; zNew = sqlite3_realloc64(p->z, nNew); |
︙ | ︙ | |||
133 134 135 136 137 138 139 | ** EOF on end-of-file. ** ** Return "" at EOF. Return 0 on an OOM error. */ static char *csv_read_one_field(CsvReader *p){ int c; p->n = 0; | | | | 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 | ** EOF on end-of-file. ** ** Return "" at EOF. Return 0 on an OOM error. */ static char *csv_read_one_field(CsvReader *p){ int c; p->n = 0; c = csv_getc(p); if( c==EOF ){ p->cTerm = EOF; return ""; } if( c=='"' ){ int pc, ppc; int startLine = p->nLine; int cQuote = c; pc = ppc = 0; while( 1 ){ c = csv_getc(p); if( c=='\n' ) p->nLine++; if( c==cQuote ){ if( pc==cQuote ){ pc = 0; continue; } } |
︙ | ︙ | |||
178 179 180 181 182 183 184 | if( csv_append(p, (char)c) ) return 0; ppc = pc; pc = c; } }else{ while( c!=EOF && c!=',' && c!='\n' ){ if( csv_append(p, (char)c) ) return 0; | | | 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | if( csv_append(p, (char)c) ) return 0; ppc = pc; pc = c; } }else{ while( c!=EOF && c!=',' && c!='\n' ){ if( csv_append(p, (char)c) ) return 0; c = csv_getc(p); } if( c=='\n' ){ p->nLine++; if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; } p->cTerm = c; } |
︙ | ︙ | |||
212 213 214 215 216 217 218 219 220 221 222 223 224 225 | static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); /* An instance of the CSV virtual table */ typedef struct CsvTable { sqlite3_vtab base; /* Base class. Must be first */ char *zFilename; /* Name of the CSV file */ long iStart; /* Offset to start of data in zFilename */ int nCol; /* Number of columns in the CSV file */ unsigned int tstFlags; /* Bit values used for testing */ } CsvTable; /* Allowed values for tstFlags */ #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ | > | 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 | static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); /* An instance of the CSV virtual table */ typedef struct CsvTable { sqlite3_vtab base; /* Base class. Must be first */ char *zFilename; /* Name of the CSV file */ char *zData; /* Raw CSV data in lieu of zFilename */ long iStart; /* Offset to start of data in zFilename */ int nCol; /* Number of columns in the CSV file */ unsigned int tstFlags; /* Bit values used for testing */ } CsvTable; /* Allowed values for tstFlags */ #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ |
︙ | ︙ | |||
285 286 287 288 289 290 291 292 293 294 295 296 297 298 | static const char *csv_parameter(const char *zTag, int nTag, const char *z){ z = csv_skip_whitespace(z); if( strncmp(zTag, z, nTag)!=0 ) return 0; z = csv_skip_whitespace(z+nTag); if( z[0]!='=' ) return 0; return csv_skip_whitespace(z+1); } /* Return 0 if the argument is false and 1 if it is true. Return -1 if ** we cannot really tell. */ static int csv_boolean(const char *z){ if( sqlite3_stricmp("yes",z)==0 || sqlite3_stricmp("on",z)==0 | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 | static const char *csv_parameter(const char *zTag, int nTag, const char *z){ z = csv_skip_whitespace(z); if( strncmp(zTag, z, nTag)!=0 ) return 0; z = csv_skip_whitespace(z+nTag); if( z[0]!='=' ) return 0; return csv_skip_whitespace(z+1); } /* Decode a parameter that requires a dequoted string. ** ** Return 1 if the parameter is seen, or 0 if not. 1 is returned ** even if there is an error. If an error occurs, then an error message ** is left in p->zErr. If there are no errors, p->zErr[0]==0. */ static int csv_string_parameter( CsvReader *p, /* Leave the error message here, if there is one */ const char *zParam, /* Parameter we are checking for */ const char *zArg, /* Raw text of the virtual table argment */ char **pzVal /* Write the dequoted string value here */ ){ const char *zValue; zValue = csv_parameter(zParam,strlen(zParam),zArg); if( zValue==0 ) return 0; p->zErr[0] = 0; if( *pzVal ){ csv_errmsg(p, "more than one '%s' parameter", zParam); return 1; } *pzVal = sqlite3_mprintf("%s", zValue); if( *pzVal==0 ){ csv_errmsg(p, "out of memory"); return 1; } csv_trim_whitespace(*pzVal); csv_dequote(*pzVal); return 1; } /* Return 0 if the argument is false and 1 if it is true. Return -1 if ** we cannot really tell. */ static int csv_boolean(const char *z){ if( sqlite3_stricmp("yes",z)==0 || sqlite3_stricmp("on",z)==0 |
︙ | ︙ | |||
310 311 312 313 314 315 316 | } return -1; } /* ** Parameters: | | > | | < < | > > > > > > > > > > > | < < < | < < < < < < | < | < < < < < | 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | } return -1; } /* ** Parameters: ** filename=FILENAME Name of file containing CSV content ** data=TEXT Direct CSV content. ** schema=SCHEMA Alternative CSV schema. ** header=YES|NO First row of CSV defines the names of ** columns if "yes". Default "no". ** columns=N Assume the CSV file contains N columns. ** testflags=N Bitmask of test flags. Optional ** ** If schema= is omitted, then the columns are named "c0", "c1", "c2", ** and so forth. If columns=N is omitted, then the file is opened and ** the number of columns in the first row is counted to determine the ** column count. If header=YES, then the first row is skipped. */ static int csvtabConnect( sqlite3 *db, void *pAux, int argc, const char *const*argv, sqlite3_vtab **ppVtab, char **pzErr ){ CsvTable *pNew = 0; /* The CsvTable object to construct */ int bHeader = -1; /* header= flags. -1 means not seen yet */ int rc = SQLITE_OK; /* Result code from this routine */ int i, j; /* Loop counters */ int tstFlags = 0; /* Value for testflags=N parameter */ int nCol = -99; /* Value of the columns= parameter */ CsvReader sRdr; /* A CSV file reader used to store an error ** message and/or to count the number of columns */ static const char *azParam[] = { "filename", "data", "schema", }; char *azPValue[3]; /* Parameter values */ # define CSV_FILENAME (azPValue[0]) # define CSV_DATA (azPValue[1]) # define CSV_SCHEMA (azPValue[2]) assert( sizeof(azPValue)==sizeof(azParam) ); memset(&sRdr, 0, sizeof(sRdr)); memset(azPValue, 0, sizeof(azPValue)); for(i=3; i<argc; i++){ const char *z = argv[i]; const char *zValue; for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; } if( j<sizeof(azParam)/sizeof(azParam[0]) ){ if( sRdr.zErr[0] ) goto csvtab_connect_error; }else if( (zValue = csv_parameter("header",6,z))!=0 ){ int x; if( bHeader>=0 ){ csv_errmsg(&sRdr, "more than one 'header' parameter"); goto csvtab_connect_error; } |
︙ | ︙ | |||
399 400 401 402 403 404 405 | } }else { csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); goto csvtab_connect_error; } } | | | | | > < | | | | | | > | > | | > | 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 | } }else { csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); goto csvtab_connect_error; } } if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ csv_errmsg(&sRdr, "must either filename= or data= but not both"); goto csvtab_connect_error; } if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ goto csvtab_connect_error; } pNew = sqlite3_malloc( sizeof(*pNew) ); *ppVtab = (sqlite3_vtab*)pNew; if( pNew==0 ) goto csvtab_connect_oom; memset(pNew, 0, sizeof(*pNew)); if( nCol>0 ){ pNew->nCol = nCol; }else{ do{ const char *z = csv_read_one_field(&sRdr); if( z==0 ) goto csvtab_connect_oom; pNew->nCol++; }while( sRdr.cTerm==',' ); } pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; pNew->zData = CSV_DATA; CSV_DATA = 0; pNew->tstFlags = tstFlags; pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; csv_reader_reset(&sRdr); if( CSV_SCHEMA==0 ){ char *zSep = ""; CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x("); if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; for(i=0; i<pNew->nCol; i++){ CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); zSep = ","; } CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA); } rc = sqlite3_declare_vtab(db, CSV_SCHEMA); if( rc ) goto csvtab_connect_error; for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ sqlite3_free(azPValue[i]); } return SQLITE_OK; csvtab_connect_oom: rc = SQLITE_NOMEM; csv_errmsg(&sRdr, "out of memory"); csvtab_connect_error: if( pNew ) csvtabDisconnect(&pNew->base); for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ sqlite3_free(azPValue[i]); } if( sRdr.zErr[0] ){ sqlite3_free(*pzErr); *pzErr = sqlite3_mprintf("%s", sRdr.zErr); } csv_reader_reset(&sRdr); if( rc==SQLITE_OK ) rc = SQLITE_ERROR; return rc; |
︙ | ︙ | |||
504 505 506 507 508 509 510 | CsvTable *pTab = (CsvTable*)p; CsvCursor *pCur; pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol ); if( pCur==0 ) return SQLITE_NOMEM; memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol ); pCur->azVal = (char**)&pCur[1]; *ppCursor = &pCur->base; | | | 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 | CsvTable *pTab = (CsvTable*)p; CsvCursor *pCur; pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol ); if( pCur==0 ) return SQLITE_NOMEM; memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol ); pCur->azVal = (char**)&pCur[1]; *ppCursor = &pCur->base; if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ csv_xfer_error(pTab, &pCur->rdr); return SQLITE_ERROR; } return SQLITE_OK; } |
︙ | ︙ | |||
593 594 595 596 597 598 599 | sqlite3_vtab_cursor *pVtabCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ CsvCursor *pCur = (CsvCursor*)pVtabCursor; CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; pCur->iRowid = 0; | > > > > > | > > > | | | | > > > > > > > | 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 | sqlite3_vtab_cursor *pVtabCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ CsvCursor *pCur = (CsvCursor*)pVtabCursor; CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; pCur->iRowid = 0; if( pCur->rdr.in==0 ){ assert( pCur->rdr.zIn==pTab->zData ); assert( pTab->iStart<=pCur->rdr.nIn ); pCur->rdr.iIn = pTab->iStart; }else{ fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); pCur->rdr.iIn = 0; pCur->rdr.nIn = 0; } return csvtabNext(pVtabCursor); } /* ** Only a forward full table scan is supported. xBestIndex is mostly ** a no-op. If CSVTEST_FIDX is set, then the presence of equality ** constraints lowers the estimated cost, which is fiction, but is useful ** for testing certain kinds of virtual table behavior. */ static int csvtabBestIndex( sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo ){ CsvTable *pTab = (CsvTable*)tab; int i; int nConst = 0; pIdxInfo->estimatedCost = 1000000; if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){ return SQLITE_OK; } /* The usual (and sensible) case is to take the "return SQLITE_OK" above. ** The code below only runs when testflags=1. The code below ** generates an artifical and unrealistic plan which is useful ** for testing virtual table logic but is not helpfulto real applications. ** ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual ** table (even though it is not) and the cost of running the virtual table ** is reduced from 1 million to just 10. The constraints are *not* marked ** as omittable, however, so the query planner should still generate a ** plan that gives a correct answer, even if they plan is not optimal. */ for(i=0; i<pIdxInfo->nConstraint; i++){ unsigned char op; if( pIdxInfo->aConstraint[i].usable==0 ) continue; op = pIdxInfo->aConstraint[i].op; if( op==SQLITE_INDEX_CONSTRAINT_EQ || op==SQLITE_INDEX_CONSTRAINT_LIKE || op==SQLITE_INDEX_CONSTRAINT_GLOB |
︙ | ︙ |