/ Check-in [d59f5809]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix another problem in stat4 sample selection.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d59f580904e6e7e90fc0a692a3dd4eeff5942479
User & Date: dan 2013-09-03 14:43:12
Context
2013-09-03
19:26
Harden the STAT4 logic in where.c against OOM faults. check-in: 91d2cfbc user: drh tags: trunk
14:43
Fix another problem in stat4 sample selection. check-in: d59f5809 user: dan tags: trunk
14:03
Make sure the omit-noop-left-join optimization is not applied if columns of the LEFT JOIN are used in the ORDER BY clause. Ticket [be84e357c035] check-in: 0303d6bc user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

378
379
380
381
382
383
384
385





























386
387
388



389
390




391
392
393
394
395
396
397

398
399
400
401

402
403



404
405
406
407
408
409
410
...
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
...
437
438
439
440
441
442
443

444
445
446
447
448
449
450
451
452
...
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
...
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
...
557
558
559
560
561
562
563
564


565
566
567
568
569
570
571
...
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
  statInit,        /* xFunc */
  0,               /* xStep */
  0,               /* xFinalize */
  "stat_init",     /* zName */
  0,               /* pHash */
  0                /* pDestructor */
};






























#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
/*
** Return true if pNew is to be preferred over pOld.



*/
static int sampleIsBetter(Stat4Sample *pNew, Stat4Sample *pOld){




  tRowcnt nEqNew = pNew->anEq[pNew->iCol];
  tRowcnt nEqOld = pOld->anEq[pOld->iCol];

  assert( pOld->isPSample==0 && pNew->isPSample==0 );
  assert( IsStat4 || (pNew->iCol==0 && pOld->iCol==0) );

  if( (nEqNew>nEqOld)

   || (nEqNew==nEqOld && pNew->iCol<pOld->iCol)
   || (nEqNew==nEqOld && pNew->iCol==pOld->iCol && pNew->iHash>pOld->iHash)
  ){
    return 1;

  }
  return 0;



}

/*
** Copy the contents of object (*pFrom) into (*pTo).
*/
void sampleCopy(Stat4Accum *p, Stat4Sample *pTo, Stat4Sample *pFrom){
  pTo->iRowid = pFrom->iRowid;
................................................................................
/*
** Copy the contents of sample *pNew into the p->a[] array. If necessary,
** remove the least desirable sample from p->a[] to make room.
*/
static void sampleInsert(Stat4Accum *p, Stat4Sample *pNew, int nEqZero){
  Stat4Sample *pSample;
  int i;
  i64 iSeq;
  int iPos;

  assert( IsStat4 || nEqZero==0 );

  if( pNew->isPSample==0 ){
    Stat4Sample *pUpgrade = 0;
    assert( pNew->anEq[pNew->iCol]>0 );

................................................................................
    ** added a sample that shares this prefix, there is no need to add
    ** this one. Instead, upgrade the priority of the highest priority
    ** existing sample that shares this prefix.  */
    for(i=p->nSample-1; i>=0; i--){
      Stat4Sample *pOld = &p->a[i];
      if( pOld->anEq[pNew->iCol]==0 ){
        if( pOld->isPSample ) return;

        assert( sampleIsBetter(pNew, pOld) );
        if( pUpgrade==0 || sampleIsBetter(pOld, pUpgrade) ){
          pUpgrade = pOld;
        }
      }
    }
    if( pUpgrade ){
      pUpgrade->iCol = pNew->iCol;
      pUpgrade->anEq[pUpgrade->iCol] = pNew->anEq[pUpgrade->iCol];
................................................................................
       || pNew->anLt[p->nCol-1] > p->a[p->nSample-1].anLt[p->nCol-1] );
#endif

  /* Insert the new sample */
  pSample = &p->a[p->nSample];
  sampleCopy(p, pSample, pNew);
  p->nSample++;

#if 0
  iSeq = pNew->anLt[p->nCol-1];
  for(iPos=p->nSample; iPos>0; iPos--){
    if( iSeq>p->a[iPos-1].anLt[p->nCol-1] ) break;
  }

  if( iPos!=p->nSample ){
    Stat4Sample *pEnd = &p->a[p->nSample];
    tRowcnt *anEq = pEnd->anEq;
    tRowcnt *anLt = pEnd->anLt;
    tRowcnt *anDLt = pEnd->anDLt;
    memmove(&p->a[iPos], &p->a[iPos+1], (p->nSample-iPos)*sizeof(p->a[0]));
    pSample->anEq = anEq;
    pSample->anDLt = anDLt;
    pSample->anLt = anLt;
  }
#endif


  /* Zero the first nEqZero entries in the anEq[] array. */
  memset(pSample->anEq, 0, sizeof(tRowcnt)*nEqZero);

 find_new_min:
  if( p->nSample>=p->mxSample ){
    int iMin = -1;
    for(i=0; i<p->mxSample; i++){
      if( p->a[i].isPSample ) continue;
      if( iMin<0 || sampleIsBetter(&p->a[iMin], &p->a[i]) ){
        iMin = i;
      }
    }
    assert( iMin>=0 );
    p->iMin = iMin;
  }
}
................................................................................
#ifdef SQLITE_ENABLE_STAT4
  int i;

  /* Check if any samples from the aBest[] array should be pushed
  ** into IndexSample.a[] at this point.  */
  for(i=(p->nCol-2); i>=iChng; i--){
    Stat4Sample *pBest = &p->aBest[i];
    if( p->nSample<p->mxSample
     || sampleIsBetter(pBest, &p->a[p->iMin])
    ){
      sampleInsert(p, pBest, i);
    }
  }

  /* Update the anEq[] fields of any samples already collected. */
  for(i=p->nSample-1; i>=0; i--){
    int j;
................................................................................
    if( (nLt/p->nPSample)!=(nLt+nEq)/p->nPSample ){
      p->current.isPSample = 1;
      sampleInsert(p, &p->current, 0);
      p->current.isPSample = 0;
    }else 

    /* Or if it is a non-periodic sample. Add it in this case too. */
    if( p->nSample<p->mxSample || sampleIsBetter(&p->current, &p->a[p->iMin]) ){


      sampleInsert(p, &p->current, 0);
    }
  }
#endif
}

/*
................................................................................
      sampleInsert(p, &p->current, p->nCol-1);
      p->current.isPSample = 0;
    }

    /* Update the aBest[] array. */
    for(i=0; i<(p->nCol-1); i++){
      p->current.iCol = i;
      if( i>=iChng || sampleIsBetter(&p->current, &p->aBest[i]) ){
        sampleCopy(p, &p->aBest[i], &p->current);
      }
    }
  }
#endif
}
static const FuncDef statPushFuncdef = {








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



>
>
>

|
>
>
>
>






|
>
|
<
<
|
>


>
>
>







 







<
<







 







>
|
|







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<









|







 







|
|
<







 







|
>
>







 







|







378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435


436
437
438
439
440
441
442
443
444
445
446
447
448
449
...
458
459
460
461
462
463
464


465
466
467
468
469
470
471
...
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
...
514
515
516
517
518
519
520



















521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
...
547
548
549
550
551
552
553
554
555

556
557
558
559
560
561
562
...
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
...
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
  statInit,        /* xFunc */
  0,               /* xStep */
  0,               /* xFinalize */
  "stat_init",     /* zName */
  0,               /* pHash */
  0                /* pDestructor */
};

#ifdef SQLITE_ENABLE_STAT4
/*
** pNew and pOld are both candidate non-periodic samples selected for 
** the same column (pNew->iCol==pOld->iCol). Ignoring this column and 
** considering only any trailing columns and the sample hash value, this
** function returns true if sample pNew is to be preferred over pOld.
** In other words, if we assume that the cardinalities of the selected
** column for pNew and pOld are equal, is pNew to be preferred over pOld.
**
** This function assumes that for each argument sample, the contents of
** the anEq[] array from pSample->anEq[pSample->iCol+1] onwards are valid. 
*/
static int sampleIsBetterPost(
  Stat4Accum *pAccum, 
  Stat4Sample *pNew, 
  Stat4Sample *pOld
){
  int nCol = pAccum->nCol;
  int i;
  assert( pNew->iCol==pOld->iCol );
  for(i=pNew->iCol+1; i<nCol; i++){
    if( pNew->anEq[i]>pOld->anEq[i] ) return 1;
    if( pNew->anEq[i]<pOld->anEq[i] ) return 0;
  }
  if( pNew->iHash>pOld->iHash ) return 1;
  return 0;
}
#endif

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
/*
** Return true if pNew is to be preferred over pOld.
**
** This function assumes that for each argument sample, the contents of
** the anEq[] array from pSample->anEq[pSample->iCol] onwards are valid. 
*/
static int sampleIsBetter(
  Stat4Accum *pAccum, 
  Stat4Sample *pNew, 
  Stat4Sample *pOld
){
  tRowcnt nEqNew = pNew->anEq[pNew->iCol];
  tRowcnt nEqOld = pOld->anEq[pOld->iCol];

  assert( pOld->isPSample==0 && pNew->isPSample==0 );
  assert( IsStat4 || (pNew->iCol==0 && pOld->iCol==0) );

  if( (nEqNew>nEqOld) ) return 1;
#ifdef SQLITE_ENABLE_STAT4
  if( nEqNew==nEqOld ){


    if( pNew->iCol<pOld->iCol ) return 1;
    return (pNew->iCol==pOld->iCol && sampleIsBetterPost(pAccum, pNew, pOld));
  }
  return 0;
#else
  return (nEqNew==nEqOld && pNew->iHash>pOld->iHash);
#endif
}

/*
** Copy the contents of object (*pFrom) into (*pTo).
*/
void sampleCopy(Stat4Accum *p, Stat4Sample *pTo, Stat4Sample *pFrom){
  pTo->iRowid = pFrom->iRowid;
................................................................................
/*
** Copy the contents of sample *pNew into the p->a[] array. If necessary,
** remove the least desirable sample from p->a[] to make room.
*/
static void sampleInsert(Stat4Accum *p, Stat4Sample *pNew, int nEqZero){
  Stat4Sample *pSample;
  int i;



  assert( IsStat4 || nEqZero==0 );

  if( pNew->isPSample==0 ){
    Stat4Sample *pUpgrade = 0;
    assert( pNew->anEq[pNew->iCol]>0 );

................................................................................
    ** added a sample that shares this prefix, there is no need to add
    ** this one. Instead, upgrade the priority of the highest priority
    ** existing sample that shares this prefix.  */
    for(i=p->nSample-1; i>=0; i--){
      Stat4Sample *pOld = &p->a[i];
      if( pOld->anEq[pNew->iCol]==0 ){
        if( pOld->isPSample ) return;
        assert( pOld->iCol>pNew->iCol );
        assert( sampleIsBetter(p, pNew, pOld) );
        if( pUpgrade==0 || sampleIsBetter(p, pOld, pUpgrade) ){
          pUpgrade = pOld;
        }
      }
    }
    if( pUpgrade ){
      pUpgrade->iCol = pNew->iCol;
      pUpgrade->anEq[pUpgrade->iCol] = pNew->anEq[pUpgrade->iCol];
................................................................................
       || pNew->anLt[p->nCol-1] > p->a[p->nSample-1].anLt[p->nCol-1] );
#endif

  /* Insert the new sample */
  pSample = &p->a[p->nSample];
  sampleCopy(p, pSample, pNew);
  p->nSample++;




















  /* Zero the first nEqZero entries in the anEq[] array. */
  memset(pSample->anEq, 0, sizeof(tRowcnt)*nEqZero);

 find_new_min:
  if( p->nSample>=p->mxSample ){
    int iMin = -1;
    for(i=0; i<p->mxSample; i++){
      if( p->a[i].isPSample ) continue;
      if( iMin<0 || sampleIsBetter(p, &p->a[iMin], &p->a[i]) ){
        iMin = i;
      }
    }
    assert( iMin>=0 );
    p->iMin = iMin;
  }
}
................................................................................
#ifdef SQLITE_ENABLE_STAT4
  int i;

  /* Check if any samples from the aBest[] array should be pushed
  ** into IndexSample.a[] at this point.  */
  for(i=(p->nCol-2); i>=iChng; i--){
    Stat4Sample *pBest = &p->aBest[i];
    pBest->anEq[i] = p->current.anEq[i];
    if( p->nSample<p->mxSample || sampleIsBetter(p, pBest, &p->a[p->iMin]) ){

      sampleInsert(p, pBest, i);
    }
  }

  /* Update the anEq[] fields of any samples already collected. */
  for(i=p->nSample-1; i>=0; i--){
    int j;
................................................................................
    if( (nLt/p->nPSample)!=(nLt+nEq)/p->nPSample ){
      p->current.isPSample = 1;
      sampleInsert(p, &p->current, 0);
      p->current.isPSample = 0;
    }else 

    /* Or if it is a non-periodic sample. Add it in this case too. */
    if( p->nSample<p->mxSample 
     || sampleIsBetter(p, &p->current, &p->a[p->iMin]) 
    ){
      sampleInsert(p, &p->current, 0);
    }
  }
#endif
}

/*
................................................................................
      sampleInsert(p, &p->current, p->nCol-1);
      p->current.isPSample = 0;
    }

    /* Update the aBest[] array. */
    for(i=0; i<(p->nCol-1); i++){
      p->current.iCol = i;
      if( i>=iChng || sampleIsBetterPost(p, &p->current, &p->aBest[i]) ){
        sampleCopy(p, &p->aBest[i], &p->current);
      }
    }
  }
#endif
}
static const FuncDef statPushFuncdef = {

Changes to test/analyze9.test.

240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
...
911
912
913
914
915
916
917
918

































919
920
    neq,
    lrange(nlt, 0, 2),
    lrange(ndlt, 0, 2),
    lrange(test_decode(sample), 0, 1)
    FROM sqlite_stat4
  ORDER BY rowid DESC LIMIT 2;
} {
  {2 1 1 1} {295 296 296} {120 122 125} {201 4} 
  {5 3 1 1} {290 290 292} {119 119 121} {200 1}
}

do_execsql_test 4.4 { SELECT count(DISTINCT c) FROM t1 WHERE c<201 } 120
do_execsql_test 4.5 { SELECT count(DISTINCT c) FROM t1 WHERE c<200 } 119

# Check that the perioidic samples are present.
................................................................................
    execsql {
      CREATE TABLE t1(x, y);
      CREATE VIEW v1 AS SELECT * FROM t1;
    }
    catchsql ANALYZE
  } {1 {not authorized}}
}


































finish_test








|







 








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
...
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
    neq,
    lrange(nlt, 0, 2),
    lrange(ndlt, 0, 2),
    lrange(test_decode(sample), 0, 1)
    FROM sqlite_stat4
  ORDER BY rowid DESC LIMIT 2;
} {
  {2 1 1 1} {295 295 295} {120 121 124} {201 3} 
  {5 3 1 1} {290 290 292} {119 119 121} {200 1}
}

do_execsql_test 4.4 { SELECT count(DISTINCT c) FROM t1 WHERE c<201 } 120
do_execsql_test 4.5 { SELECT count(DISTINCT c) FROM t1 WHERE c<200 } 119

# Check that the perioidic samples are present.
................................................................................
    execsql {
      CREATE TABLE t1(x, y);
      CREATE VIEW v1 AS SELECT * FROM t1;
    }
    catchsql ANALYZE
  } {1 {not authorized}}
}

#-------------------------------------------------------------------------
#
reset_db
proc r {args} { expr rand() }
db func r r
db func lrange lrange
do_test 20.1 {
  execsql {
    CREATE TABLE t1(a,b,c,d);
    CREATE INDEX i1 ON t1(a,b,c,d);
  }
  for {set i 0} {$i < 16} {incr i} {
    execsql {
      INSERT INTO t1 VALUES($i, r(), r(), r());
      INSERT INTO t1 VALUES($i, $i,  r(), r());
      INSERT INTO t1 VALUES($i, $i,  $i,  r());
      INSERT INTO t1 VALUES($i, $i,  $i,  $i);
      INSERT INTO t1 VALUES($i, $i,  $i,  $i);
      INSERT INTO t1 VALUES($i, $i,  $i,  r());
      INSERT INTO t1 VALUES($i, $i,  r(), r());
      INSERT INTO t1 VALUES($i, r(), r(), r());
    }
  }
} {}
do_execsql_test 20.2 { ANALYZE }
for {set i 0} {$i<16} {incr i} {
    set val "$i $i $i $i"
    do_execsql_test 20.3.$i {
      SELECT count(*) FROM sqlite_stat4 
      WHERE lrange(test_decode(sample), 0, 3)=$val
    } {1}
}

finish_test