SQLite

Check-in [cb34cfe57c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Multiply all cursor step cost estimates by the estimated size of the row in bytes, in order to get the query planner ot make use of estimated row sizes. This check-in uses magic numbers in a few places (for example, estimates of the size of output rows) and needs lots of refinement. Consider this a proof-of-concept only.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | row-size-est
Files: files | file ages | folders
SHA1: cb34cfe57c2a664fbfae8106e95114400ea222d5
User & Date: drh 2013-10-07 17:32:15.338
Context
2013-10-08
18:40
Further refinement of the idea of multiplying run-time cost estimates by the estimated row size. (check-in: 18bd6ba96d user: drh tags: row-size-est)
2013-10-07
17:32
Multiply all cursor step cost estimates by the estimated size of the row in bytes, in order to get the query planner ot make use of estimated row sizes. This check-in uses magic numbers in a few places (for example, estimates of the size of output rows) and needs lots of refinement. Consider this a proof-of-concept only. (check-in: cb34cfe57c user: drh tags: row-size-est)
10:48
Merge bug fixes from trunk. (check-in: 1d7b2dc0ea user: drh tags: row-size-est)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/where.c.
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
#endif
    if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
      /* Each row involves a step of the index, then a binary search of
      ** the main table */
      pNew->rRun =  sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10);
    }
    /* Step cost for each output row */
    pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut);
    whereLoopOutputAdjust(pBuilder->pWC, pNew, pSrc->iCursor);
    rc = whereLoopInsert(pBuilder, pNew);
    if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0
     && pNew->u.btree.nEq<(pProbe->nColumn + (pProbe->zName!=0))
    ){
      whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nInMul+nIn);
    }







|







4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
#endif
    if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
      /* Each row involves a step of the index, then a binary search of
      ** the main table */
      pNew->rRun =  sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10);
    }
    /* Step cost for each output row */
    pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut) + pProbe->szIdxRow;
    whereLoopOutputAdjust(pBuilder->pWC, pNew, pSrc->iCursor);
    rc = whereLoopInsert(pBuilder, pNew);
    if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0
     && pNew->u.btree.nEq<(pProbe->nColumn + (pProbe->zName!=0))
    ){
      whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nInMul+nIn);
    }
4467
4468
4469
4470
4471
4472
4473

4474
4475
4476
4477
4478

4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
  WhereLoop *pNew;            /* Template WhereLoop object */
  int rc = SQLITE_OK;         /* Return code */
  int iSortIdx = 1;           /* Index number */
  int b;                      /* A boolean value */
  LogEst rSize;               /* number of rows in the table */
  LogEst rLogSize;            /* Logarithm of the number of rows in the table */
  WhereClause *pWC;           /* The parsed WHERE clause */

  
  pNew = pBuilder->pNew;
  pWInfo = pBuilder->pWInfo;
  pTabList = pWInfo->pTabList;
  pSrc = pTabList->a + pNew->iTab;

  pWC = pBuilder->pWC;
  assert( !IsVirtual(pSrc->pTab) );

  if( pSrc->pIndex ){
    /* An INDEXED BY clause specifies a particular index to use */
    pProbe = pSrc->pIndex;
  }else{
    /* There is no INDEXED BY clause.  Create a fake Index object in local
    ** variable sPk to represent the rowid primary key index.  Make this
    ** fake index the first in a chain of Index objects with all of the real
    ** indices to follow */
    Index *pFirst;                  /* First of real indices on the table */
    memset(&sPk, 0, sizeof(Index));
    sPk.nColumn = 1;
    sPk.aiColumn = &aiColumnPk;
    sPk.aiRowEst = aiRowEstPk;
    sPk.onError = OE_Replace;
    sPk.pTable = pSrc->pTab;
    aiRowEstPk[0] = pSrc->pTab->nRowEst;
    aiRowEstPk[1] = 1;
    pFirst = pSrc->pTab->pIndex;
    if( pSrc->notIndexed==0 ){
      /* The real indices of the table are only considered if the
      ** NOT INDEXED qualifier is omitted from the FROM clause */
      sPk.pNext = pFirst;
    }
    pProbe = &sPk;
  }
  rSize = sqlite3LogEst(pSrc->pTab->nRowEst);
  rLogSize = estLog(rSize);

#ifndef SQLITE_OMIT_AUTOMATIC_INDEX
  /* Automatic indexes */
  if( !pBuilder->pOrSet
   && (pWInfo->pParse->db->flags & SQLITE_AutoIndex)!=0
   && pSrc->pIndex==0







>





>

















|
|









|







4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
  WhereLoop *pNew;            /* Template WhereLoop object */
  int rc = SQLITE_OK;         /* Return code */
  int iSortIdx = 1;           /* Index number */
  int b;                      /* A boolean value */
  LogEst rSize;               /* number of rows in the table */
  LogEst rLogSize;            /* Logarithm of the number of rows in the table */
  WhereClause *pWC;           /* The parsed WHERE clause */
  Table *pTab;                /* Table being queried */
  
  pNew = pBuilder->pNew;
  pWInfo = pBuilder->pWInfo;
  pTabList = pWInfo->pTabList;
  pSrc = pTabList->a + pNew->iTab;
  pTab = pSrc->pTab;
  pWC = pBuilder->pWC;
  assert( !IsVirtual(pSrc->pTab) );

  if( pSrc->pIndex ){
    /* An INDEXED BY clause specifies a particular index to use */
    pProbe = pSrc->pIndex;
  }else{
    /* There is no INDEXED BY clause.  Create a fake Index object in local
    ** variable sPk to represent the rowid primary key index.  Make this
    ** fake index the first in a chain of Index objects with all of the real
    ** indices to follow */
    Index *pFirst;                  /* First of real indices on the table */
    memset(&sPk, 0, sizeof(Index));
    sPk.nColumn = 1;
    sPk.aiColumn = &aiColumnPk;
    sPk.aiRowEst = aiRowEstPk;
    sPk.onError = OE_Replace;
    sPk.pTable = pTab;
    aiRowEstPk[0] = pTab->nRowEst;
    aiRowEstPk[1] = 1;
    pFirst = pSrc->pTab->pIndex;
    if( pSrc->notIndexed==0 ){
      /* The real indices of the table are only considered if the
      ** NOT INDEXED qualifier is omitted from the FROM clause */
      sPk.pNext = pFirst;
    }
    pProbe = &sPk;
  }
  rSize = sqlite3LogEst(pTab->nRowEst);
  rLogSize = estLog(rSize);

#ifndef SQLITE_OMIT_AUTOMATIC_INDEX
  /* Automatic indexes */
  if( !pBuilder->pOrSet
   && (pWInfo->pParse->db->flags & SQLITE_AutoIndex)!=0
   && pSrc->pIndex==0
4526
4527
4528
4529
4530
4531
4532

4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
        pNew->u.btree.pIndex = 0;
        pNew->nLTerm = 1;
        pNew->aLTerm[0] = pTerm;
        /* TUNING: One-time cost for computing the automatic index is
        ** approximately 7*N*log2(N) where N is the number of rows in
        ** the table being indexed. */
        pNew->rSetup = rLogSize + rSize + 28;  assert( 28==sqlite3LogEst(7) );

        /* TUNING: Each index lookup yields 20 rows in the table.  This
        ** is more than the usual guess of 10 rows, since we have no way
        ** of knowning how selective the index will ultimately be.  It would
        ** not be unreasonable to make this value much larger. */
        pNew->nOut = 43;  assert( 43==sqlite3LogEst(20) );
        pNew->rRun = sqlite3LogEstAdd(rLogSize,pNew->nOut);
        pNew->wsFlags = WHERE_AUTO_INDEX;
        pNew->prereq = mExtra | pTerm->prereqRight;
        rc = whereLoopInsert(pBuilder, pNew);
      }
    }
  }
#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */







>





|







4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
        pNew->u.btree.pIndex = 0;
        pNew->nLTerm = 1;
        pNew->aLTerm[0] = pTerm;
        /* TUNING: One-time cost for computing the automatic index is
        ** approximately 7*N*log2(N) where N is the number of rows in
        ** the table being indexed. */
        pNew->rSetup = rLogSize + rSize + 28;  assert( 28==sqlite3LogEst(7) );
        pNew->rSetup += pTab->szTabRow;
        /* TUNING: Each index lookup yields 20 rows in the table.  This
        ** is more than the usual guess of 10 rows, since we have no way
        ** of knowning how selective the index will ultimately be.  It would
        ** not be unreasonable to make this value much larger. */
        pNew->nOut = 43;  assert( 43==sqlite3LogEst(20) );
        pNew->rRun = sqlite3LogEstAdd(rLogSize,pNew->nOut) + pTab->szTabRow;
        pNew->wsFlags = WHERE_AUTO_INDEX;
        pNew->prereq = mExtra | pTerm->prereqRight;
        rc = whereLoopInsert(pBuilder, pNew);
      }
    }
  }
#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
      pNew->wsFlags = WHERE_IPK;

      /* Full table scan */
      pNew->iSortIdx = b ? iSortIdx : 0;
      /* TUNING: Cost of full table scan is 3*(N + log2(N)).
      **  +  The extra 3 factor is to encourage the use of indexed lookups
      **     over full scans.  FIXME */
      pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16;
      whereLoopOutputAdjust(pWC, pNew, pSrc->iCursor);
      rc = whereLoopInsert(pBuilder, pNew);
      pNew->nOut = rSize;
      if( rc ) break;
    }else{
      Bitmask m = pSrc->colUsed & ~columnsInIndex(pProbe);
      pNew->wsFlags = (m==0) ? (WHERE_IDX_ONLY|WHERE_INDEXED) : WHERE_INDEXED;







|







4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
      pNew->wsFlags = WHERE_IPK;

      /* Full table scan */
      pNew->iSortIdx = b ? iSortIdx : 0;
      /* TUNING: Cost of full table scan is 3*(N + log2(N)).
      **  +  The extra 3 factor is to encourage the use of indexed lookups
      **     over full scans.  FIXME */
      pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16 + pTab->szTabRow;
      whereLoopOutputAdjust(pWC, pNew, pSrc->iCursor);
      rc = whereLoopInsert(pBuilder, pNew);
      pNew->nOut = rSize;
      if( rc ) break;
    }else{
      Bitmask m = pSrc->colUsed & ~columnsInIndex(pProbe);
      pNew->wsFlags = (m==0) ? (WHERE_IDX_ONLY|WHERE_INDEXED) : WHERE_INDEXED;
4598
4599
4600
4601
4602
4603
4604

4605
4606
4607
4608
4609
4610
4611
          pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 10;
        }else{
          assert( b!=0 ); 
          /* TUNING: Cost of scanning a non-covering index is (N+1)*log2(N)
          ** which we will simplify to just N*log2(N) */
          pNew->rRun = rSize + rLogSize;
        }

        whereLoopOutputAdjust(pWC, pNew, pSrc->iCursor);
        rc = whereLoopInsert(pBuilder, pNew);
        pNew->nOut = rSize;
        if( rc ) break;
      }
    }








>







4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
          pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 10;
        }else{
          assert( b!=0 ); 
          /* TUNING: Cost of scanning a non-covering index is (N+1)*log2(N)
          ** which we will simplify to just N*log2(N) */
          pNew->rRun = rSize + rLogSize;
        }
        pNew->rRun += pProbe->szIdxRow;
        whereLoopOutputAdjust(pWC, pNew, pSrc->iCursor);
        rc = whereLoopInsert(pBuilder, pNew);
        pNew->nOut = rSize;
        if( rc ) break;
      }
    }

4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
      pNew->u.vtab.idxNum = pIdxInfo->idxNum;
      pNew->u.vtab.needFree = pIdxInfo->needToFreeIdxStr;
      pIdxInfo->needToFreeIdxStr = 0;
      pNew->u.vtab.idxStr = pIdxInfo->idxStr;
      pNew->u.vtab.isOrdered = (u8)((pIdxInfo->nOrderBy!=0)
                                     && pIdxInfo->orderByConsumed);
      pNew->rSetup = 0;
      pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost);
      /* TUNING: Every virtual table query returns 25 rows */
      pNew->nOut = 46;  assert( 46==sqlite3LogEst(25) );
      whereLoopInsert(pBuilder, pNew);
      if( pNew->u.vtab.needFree ){
        sqlite3_free(pNew->u.vtab.idxStr);
        pNew->u.vtab.needFree = 0;
      }







|







4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
      pNew->u.vtab.idxNum = pIdxInfo->idxNum;
      pNew->u.vtab.needFree = pIdxInfo->needToFreeIdxStr;
      pIdxInfo->needToFreeIdxStr = 0;
      pNew->u.vtab.idxStr = pIdxInfo->idxStr;
      pNew->u.vtab.isOrdered = (u8)((pIdxInfo->nOrderBy!=0)
                                     && pIdxInfo->orderByConsumed);
      pNew->rSetup = 0;
      pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost) + 55;
      /* TUNING: Every virtual table query returns 25 rows */
      pNew->nOut = 46;  assert( 46==sqlite3LogEst(25) );
      whereLoopInsert(pBuilder, pNew);
      if( pNew->u.vtab.needFree ){
        sqlite3_free(pNew->u.vtab.idxStr);
        pNew->u.vtab.needFree = 0;
      }
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
  ** to sqlite3WhereBegin() was concerned about sorting */
  rSortCost = 0;
  if( pWInfo->pOrderBy==0 || nRowEst==0 ){
    aFrom[0].isOrderedValid = 1;
  }else{
    /* TUNING: Estimated cost of sorting is N*log2(N) where N is the
    ** number of output rows. */
    rSortCost = nRowEst + estLog(nRowEst);
    WHERETRACE(0x002,("---- sort cost=%-3d\n", rSortCost));
  }

  /* Compute successively longer WherePaths using the previous generation
  ** of WherePaths as the basis for the next.  Keep track of the mxChoice
  ** best paths at each generation */
  for(iLoop=0; iLoop<nLoop; iLoop++){







|







5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
  ** to sqlite3WhereBegin() was concerned about sorting */
  rSortCost = 0;
  if( pWInfo->pOrderBy==0 || nRowEst==0 ){
    aFrom[0].isOrderedValid = 1;
  }else{
    /* TUNING: Estimated cost of sorting is N*log2(N) where N is the
    ** number of output rows. */
    rSortCost = nRowEst + estLog(nRowEst) + 55;
    WHERETRACE(0x002,("---- sort cost=%-3d\n", rSortCost));
  }

  /* Compute successively longer WherePaths using the previous generation
  ** of WherePaths as the basis for the next.  Keep track of the mxChoice
  ** best paths at each generation */
  for(iLoop=0; iLoop<nLoop; iLoop++){