Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -4586,10 +4586,11 @@ } } if( pIdxKey ){ xRecordCompare = sqlite3VdbeFindCompare(pIdxKey); + pIdxKey->isCorrupt = 0; assert( pIdxKey->default_rc==1 || pIdxKey->default_rc==0 || pIdxKey->default_rc==-1 ); }else{ @@ -4709,19 +4710,21 @@ goto moveto_finish; } c = xRecordCompare(nCell, pCellKey, pIdxKey, 0); sqlite3_free(pCellKey); } + assert( pIdxKey->isCorrupt==0 || c==0 ); if( c<0 ){ lwr = idx+1; }else if( c>0 ){ upr = idx-1; }else{ assert( c==0 ); *pRes = 0; rc = SQLITE_OK; pCur->aiIdx[pCur->iPage] = (u16)idx; + if( pIdxKey->isCorrupt ) rc = SQLITE_CORRUPT; goto moveto_finish; } if( lwr>upr ) break; assert( lwr+upr>=0 ); idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */ Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -1625,10 +1625,11 @@ */ struct UnpackedRecord { KeyInfo *pKeyInfo; /* Collation and sort-order information */ u16 nField; /* Number of entries in apMem[] */ i8 default_rc; /* Comparison result if keys are equal */ + u8 isCorrupt; /* Corruption detected by xRecordCompare() */ Mem *aMem; /* Values */ int r1; /* Value to return if (lhs > rhs) */ int r2; /* Value to return if (rhs < lhs) */ }; Index: src/vdbe.h ================================================================== --- src/vdbe.h +++ src/vdbe.h @@ -209,14 +209,14 @@ #ifndef SQLITE_OMIT_TRACE char *sqlite3VdbeExpandSql(Vdbe*, const char*); #endif void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*); -int sqlite3VdbeRecordCompare(int,const void*,const UnpackedRecord*,int); +int sqlite3VdbeRecordCompare(int,const void*,UnpackedRecord*,int); UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **); -typedef int (*RecordCompare)(int,const void*,const UnpackedRecord*,int); +typedef int (*RecordCompare)(int,const void*,UnpackedRecord*,int); RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*); #ifndef SQLITE_OMIT_TRIGGER void sqlite3VdbeLinkSubProgram(Vdbe *, SubProgram *); #endif Index: src/vdbeInt.h ================================================================== --- src/vdbeInt.h +++ src/vdbeInt.h @@ -390,11 +390,11 @@ u32 sqlite3VdbeSerialPut(unsigned char*, Mem*, u32); u32 sqlite3VdbeSerialGet(const unsigned char*, u32, Mem*); void sqlite3VdbeDeleteAuxData(Vdbe*, int, int); int sqlite2BtreeKeyCompare(BtCursor *, const void *, int, int, int *); -int sqlite3VdbeIdxKeyCompare(VdbeCursor*,const UnpackedRecord*,int*); +int sqlite3VdbeIdxKeyCompare(VdbeCursor*,UnpackedRecord*,int*); int sqlite3VdbeIdxRowid(sqlite3*, BtCursor *, i64 *); int sqlite3MemCompare(const Mem*, const Mem*, const CollSeq*); int sqlite3VdbeExec(Vdbe*); int sqlite3VdbeList(Vdbe*); int sqlite3VdbeHalt(Vdbe*); Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -3403,14 +3403,17 @@ ** determined that the first fields of the keys are equal. ** ** Key1 and Key2 do not have to contain the same number of fields. If all ** fields that appear in both keys are equal, then pPKey2->default_rc is ** returned. +** +** If database corruption is discovered, set pPKey2->isCorrupt to non-zero +** and return 0. */ int sqlite3VdbeRecordCompare( int nKey1, const void *pKey1, /* Left key */ - const UnpackedRecord *pPKey2, /* Right key */ + UnpackedRecord *pPKey2, /* Right key */ int bSkip /* If true, skip the first field */ ){ u32 d1; /* Offset into aKey[] of next data element */ int i; /* Index of next field to compare */ u32 szHdr1; /* Size of record header in bytes */ @@ -3432,11 +3435,14 @@ i = 1; pRhs++; }else{ idx1 = getVarint32(aKey1, szHdr1); d1 = szHdr1; - if( d1>(unsigned)nKey1 ) return 1; /* Corruption */ + if( d1>(unsigned)nKey1 ){ + pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + } i = 0; } VVA_ONLY( mem1.zMalloc = 0; ) /* Only needed by assert() statements */ assert( pPKey2->pKeyInfo->nField+pPKey2->pKeyInfo->nXField>=pPKey2->nField @@ -3509,11 +3515,12 @@ }else{ mem1.n = (serial_type - 12) / 2; testcase( (d1+mem1.n)==(unsigned)nKey1 ); testcase( (d1+mem1.n+1)==(unsigned)nKey1 ); if( (d1+mem1.n) > (unsigned)nKey1 ){ - rc = 1; /* Corruption */ + pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ }else if( pKeyInfo->aColl[i] ){ mem1.enc = pKeyInfo->enc; mem1.db = pKeyInfo->db; mem1.flags = MEM_Str; mem1.z = (char*)&aKey1[d1]; @@ -3535,11 +3542,12 @@ }else{ int nStr = (serial_type - 12) / 2; testcase( (d1+nStr)==(unsigned)nKey1 ); testcase( (d1+nStr+1)==(unsigned)nKey1 ); if( (d1+nStr) > (unsigned)nKey1 ){ - rc = 1; /* Corruption */ + pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ }else{ int nCmp = MIN(nStr, pRhs->n); rc = memcmp(&aKey1[d1], pRhs->z, nCmp); if( rc==0 ) rc = nStr - pRhs->n; } @@ -3594,11 +3602,11 @@ ** To avoid concerns about buffer overreads, this routine is only used ** on schemas where the maximum valid header size is 63 bytes or less. */ static int vdbeRecordCompareInt( int nKey1, const void *pKey1, /* Left key */ - const UnpackedRecord *pPKey2, /* Right key */ + UnpackedRecord *pPKey2, /* Right key */ int bSkip /* Ignored */ ){ const u8 *aKey = &((const u8*)pKey1)[*(const u8*)pKey1 & 0x3F]; int serial_type = ((const u8*)pKey1)[1]; int res; @@ -3692,11 +3700,11 @@ ** uses the collation sequence BINARY and (c) that the size-of-header varint ** at the start of (pKey1/nKey1) fits in a single byte. */ static int vdbeRecordCompareString( int nKey1, const void *pKey1, /* Left key */ - const UnpackedRecord *pPKey2, /* Right key */ + UnpackedRecord *pPKey2, /* Right key */ int bSkip ){ const u8 *aKey1 = (const u8*)pKey1; int serial_type; int res; @@ -3713,11 +3721,14 @@ int nCmp; int nStr; int szHdr = aKey1[0]; nStr = (serial_type-12) / 2; - if( (szHdr + nStr) > nKey1 ) return 0; /* Corruption */ + if( (szHdr + nStr) > nKey1 ){ + pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + } nCmp = MIN( pPKey2->aMem[0].n, nStr ); res = memcmp(&aKey1[szHdr], pPKey2->aMem[0].z, nCmp); if( res==0 ){ res = nStr - pPKey2->aMem[0].n; @@ -3878,11 +3889,11 @@ ** is ignored as well. Hence, this routine only compares the prefixes ** of the keys prior to the final rowid, not the entire key. */ int sqlite3VdbeIdxKeyCompare( VdbeCursor *pC, /* The cursor to compare against */ - const UnpackedRecord *pUnpacked, /* Unpacked version of key */ + UnpackedRecord *pUnpacked, /* Unpacked version of key */ int *res /* Write the comparison result here */ ){ i64 nCellKey = 0; int rc; BtCursor *pCur = pC->pCursor; Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -4326,22 +4326,38 @@ && sqlite3GlobalConfig.bUseCis && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan) ) ){ pNew->iSortIdx = b ? iSortIdx : 0; + /* TUNING: The base cost of an index scan is N + log2(N). + ** The log2(N) is for the initial seek to the beginning and the N + ** is for the scan itself. */ + pNew->rRun = sqlite3LogEstAdd(rSize, rLogSize); if( m==0 ){ /* TUNING: Cost of a covering index scan is K*(N + log2(N)). ** + The extra factor K of between 1.1 and 3.0 that depends ** on the relative sizes of the table and the index. K ** is smaller for smaller indices, thus favoring them. + ** The upper bound on K (3.0) matches the penalty factor + ** on a full table scan that tries to encourage the use of + ** indexed lookups over full scans. + */ + pNew->rRun += 1 + (15*pProbe->szIdxRow)/pTab->szTabRow; + }else{ + /* TUNING: The cost of scanning a non-covering index is multiplied + ** by log2(N) to account for the binary search of the main table + ** that must happen for each row of the index. + ** TODO: Should there be a multiplier here, analogous to the 3x + ** multiplier for a fulltable scan or covering index scan, to + ** further discourage the use of an index scan? Or is the log2(N) + ** term sufficient discouragement? + ** TODO: What if some or all of the WHERE clause terms can be + ** computed without reference to the original table. Then the + ** penality should reduce to logK where K is the number of output + ** rows. */ - pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 1 + - (15*pProbe->szIdxRow)/pTab->szTabRow; - }else{ - /* TUNING: Cost of scanning a non-covering index is (N+1)*log2(N) - ** which we will simplify to just N*log2(N) */ - pNew->rRun = rSize + rLogSize; + pNew->rRun += rLogSize; } whereLoopOutputAdjust(pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); pNew->nOut = rSize; if( rc ) break; @@ -4918,11 +4934,11 @@ obSat |= MASKBIT(i); } } } } /* End the loop over all WhereLoops from outer-most down to inner-most */ - if( obSat==obDone ) return nOrderBy; + if( obSat==obDone ) return (i8)nOrderBy; if( !isOrderDistinct ){ for(i=nOrderBy-1; i>0; i--){ Bitmask m = MASKBIT(i) - 1; if( (obSat&m)==m ) return i; } @@ -5039,15 +5055,23 @@ if( isOrdered<0 ){ isOrdered = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags, iLoop, pWLoop, &revMask); if( isOrdered>=0 && isOrdered0 ); + rScale = sqlite3LogEst((nOrderBy-isOrdered)*100/nOrderBy) - 66; + rSortCost = nRowEst + estLog(nRowEst) + rScale; /* TUNING: The cost of implementing DISTINCT using a B-TREE is ** also N*log(N) but it has a larger constant of proportionality. ** Multiply by 3.0. */ if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){ rSortCost += 16; Index: test/corruptG.test ================================================================== --- test/corruptG.test +++ test/corruptG.test @@ -45,16 +45,16 @@ # Try to use the file. do_test 1.2 { catchsql { SELECT c FROM t1 WHERE a>'abc'; } -} {0 {}} +} {1 {database disk image is malformed}} do_test 1.3 { catchsql { PRAGMA integrity_check } -} {0 ok} +} {1 {database disk image is malformed}} do_test 1.4 { catchsql { SELECT c FROM t1 ORDER BY a; } } {1 {database disk image is malformed}} @@ -69,13 +69,8 @@ do_test 2.1 { catchsql { SELECT rowid FROM t1 WHERE a='abc' and b='xyz123456789XYZ'; } - # The following test result is brittle. The point above is to try to - # force a buffer overread by a corrupt database file. If we get an - # incorrect answer from a corrupt database file, that is OK. If the - # result below changes, that just means that "undefined behavior" has - # changed. -} {/0 .*/} +} {1 {database disk image is malformed}} finish_test Index: test/corruptI.test ================================================================== --- test/corruptI.test +++ test/corruptI.test @@ -49,11 +49,11 @@ set offset [hexio_get_int [hexio_read test.db [expr 2*1024 + 8] 2]] set off [expr 2*1024 + $offset + 1] hexio_write test.db $off FFFF7f02 sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE a = 10 } -} {0 {}} +} {1 {database disk image is malformed}} do_test 2.0 { execsql { CREATE TABLE r(x); INSERT INTO r VALUES('ABCDEFGHIJK'); Index: test/wal64k.test ================================================================== --- test/wal64k.test +++ test/wal64k.test @@ -16,10 +16,15 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix wal64k ifcapable !wal {finish_test ; return } + +if {$tcl_platform(platform) != "unix"} { + finish_test + return +} db close test_syscall pagesize 65536 sqlite3 db test.db @@ -42,6 +47,5 @@ integrity_check 1.3 db close test_syscall pagesize -1 finish_test - Index: tool/logest.c ================================================================== --- tool/logest.c +++ tool/logest.c @@ -15,17 +15,11 @@ ** ** Usage: ** ** ./LogEst ARGS ** -** Arguments: -** -** 'x' Multiple the top two elements of the stack -** '+' Add the top two elements of the stack -** NUM Convert NUM from integer to LogEst and push onto the stack -** ^NUM Interpret NUM as a LogEst and push onto stack. -** +** See the showHelp() routine for a description of valid arguments. ** Examples: ** ** To convert 123 from LogEst to integer: ** ** ./LogEst ^123 @@ -94,48 +88,80 @@ if( x<=2000000000.0 ) return logEstFromInteger((sqlite3_uint64)x); memcpy(&a, &x, 8); e = (a>>52) - 1022; return e*10; } + +int isInteger(const char *z){ + while( z[0]>='0' && z[0]<='9' ) z++; + return z[0]==0; +} int isFloat(const char *z){ - while( z[0] ){ - if( z[0]=='.' || z[0]=='E' || z[0]=='e' ) return 1; - z++; - } - return 0; + char c; + while( ((c=z[0])>='0' && c<='9') || c=='.' || c=='E' || c=='e' + || c=='+' || c=='-' ) z++; + return z[0]==0; +} + +static void showHelp(const char *zArgv0){ + printf("Usage: %s ARGS...\n", zArgv0); + printf("Arguments:\n" + " NUM Convert NUM from integer to LogEst and push onto the stack\n" + " ^NUM Interpret NUM as a LogEst and push onto stack\n" + " x Multiple the top two elements of the stack\n" + " + Add the top two elements of the stack\n" + " dup Dupliate the top element on the stack\n" + " inv Take the reciprocal of the top of stack. N = 1/N.\n" + " log Find the LogEst of the number on top of stack\n" + " nlogn Compute NlogN where N is the top of stack\n" + ); + exit(1); } int main(int argc, char **argv){ int i; int n = 0; LogEst a[100]; for(i=1; i=2 ){ a[n-2] = logEstAdd(a[n-2],a[n-1]); n--; } - }else if( z[0]=='x' ){ + }else if( strcmp(z,"x")==0 ){ if( n>=2 ){ a[n-2] = logEstMultiply(a[n-2],a[n-1]); n--; } + }else if( strcmp(z,"dup")==0 ){ + if( n>0 ){ + a[n] = a[n-1]; + n++; + } + }else if( strcmp(z,"log")==0 ){ + if( n>0 ) a[n-1] = logEstFromInteger(a[n-1]) - 33; + }else if( strcmp(z,"nlogn")==0 ){ + if( n>0 ) a[n-1] += logEstFromInteger(a[n-1]) - 33; + }else if( strcmp(z,"inv")==0 ){ + if( n>0 ) a[n-1] = -a[n-1]; }else if( z[0]=='^' ){ a[n++] = atoi(z+1); - }else if( isFloat(z) ){ + }else if( isInteger(z) ){ + a[n++] = logEstFromInteger(atoi(z)); + }else if( isFloat(z) && z[0]!='-' ){ a[n++] = logEstFromDouble(atof(z)); }else{ - a[n++] = logEstFromInteger(atoi(z)); + showHelp(argv[0]); } } for(i=n-1; i>=0; i--){ if( a[i]<0 ){ - printf("%d (%f)\n", a[i], 1.0/(double)logEstToInt(-a[i])); + printf("%5d (%f)\n", a[i], 1.0/(double)logEstToInt(-a[i])); }else{ sqlite3_uint64 x = logEstToInt(a[i]+100)*100/1024; - printf("%d (%lld.%02lld)\n", a[i], x/100, x%100); + printf("%5d (%lld.%02lld)\n", a[i], x/100, x%100); } } return 0; }