/ Check-in [c3939d24]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid moving pages more than once in an incremental vacuum operation.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | incr-vacuum-opt
Files: files | file ages | folders
SHA1: c3939d249119b47bd57baa11a5ed7cc6014fc795
User & Date: dan 2013-02-22 20:16:34
References
2013-02-23
17:49
Fix off-by-one bug in [c3939d2491] uncovered by th3. check-in: 66f9faa9 user: dan tags: incr-vacuum-opt
Context
2013-02-22
20:57
Fix a problem with the previous commit. check-in: 720a3cea user: dan tags: incr-vacuum-opt
20:16
Avoid moving pages more than once in an incremental vacuum operation. check-in: c3939d24 user: dan tags: incr-vacuum-opt
2013-02-20
00:54
On Minix, disable the ".timer" command in the shell in order to avoid calling getrusage(). check-in: 9bd9bd9c user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

  2905   2905       }
  2906   2906     }
  2907   2907     return rc;
  2908   2908   }
  2909   2909   
  2910   2910   /* Forward declaration required by incrVacuumStep(). */
  2911   2911   static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
  2912         -
  2913         -/*
  2914         -** Perform a single step of an incremental-vacuum. If successful,
  2915         -** return SQLITE_OK. If there is no work to do (and therefore no
  2916         -** point in calling this function again), return SQLITE_DONE.
  2917         -**
  2918         -** More specificly, this function attempts to re-organize the 
  2919         -** database so that the last page of the file currently in use
  2920         -** is no longer in use.
  2921         -**
  2922         -** If the nFin parameter is non-zero, this function assumes
  2923         -** that the caller will keep calling incrVacuumStep() until
  2924         -** it returns SQLITE_DONE or an error, and that nFin is the
  2925         -** number of pages the database file will contain after this 
  2926         -** process is complete.  If nFin is zero, it is assumed that
  2927         -** incrVacuumStep() will be called a finite amount of times
  2928         -** which may or may not empty the freelist.  A full autovacuum
  2929         -** has nFin>0.  A "PRAGMA incremental_vacuum" has nFin==0.
  2930         -*/
  2931         -static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg){
         2912  +#define BTALLOC_ANY   0           /* Allocate any page */
         2913  +#define BTALLOC_EXACT 1           /* Allocate exact page if possible */
         2914  +#define BTALLOC_LE    2           /* Allocate any page <= the parameter */
         2915  +
         2916  +/*
         2917  +** Perform a single step of an incremental-vacuum. If successful, return
         2918  +** SQLITE_OK. If there is no work to do (and therefore no point in 
         2919  +** calling this function again), return SQLITE_DONE. Or, if an error 
         2920  +** occurs, return some other error code.
         2921  +**
         2922  +** More specificly, this function attempts to re-organize the database so 
         2923  +** that the last page of the file currently in use is no longer in use.
         2924  +**
         2925  +** Parameter nFin is the number of pages that this database would contain
         2926  +** were this function called until it returns SQLITE_DONE.
         2927  +**
         2928  +** If the bCommit parameter is non-zero, this function assumes that the 
         2929  +** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE 
         2930  +** or an error. bCommit is passed true for an auto-vacuum-on-commmit 
         2931  +** operation, or false for an incremental vacuum.
         2932  +*/
         2933  +static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){
  2932   2934     Pgno nFreeList;           /* Number of pages still on the free-list */
  2933   2935     int rc;
  2934   2936   
  2935   2937     assert( sqlite3_mutex_held(pBt->mutex) );
  2936   2938     assert( iLastPg>nFin );
  2937   2939   
  2938   2940     if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
................................................................................
  2949   2951         return rc;
  2950   2952       }
  2951   2953       if( eType==PTRMAP_ROOTPAGE ){
  2952   2954         return SQLITE_CORRUPT_BKPT;
  2953   2955       }
  2954   2956   
  2955   2957       if( eType==PTRMAP_FREEPAGE ){
  2956         -      if( nFin==0 ){
         2958  +      if( bCommit==0 ){
  2957   2959           /* Remove the page from the files free-list. This is not required
  2958         -        ** if nFin is non-zero. In that case, the free-list will be
         2960  +        ** if bCommit is non-zero. In that case, the free-list will be
  2959   2961           ** truncated to zero after this function returns, so it doesn't 
  2960   2962           ** matter if it still contains some garbage entries.
  2961   2963           */
  2962   2964           Pgno iFreePg;
  2963   2965           MemPage *pFreePg;
  2964         -        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, 1);
         2966  +        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT);
  2965   2967           if( rc!=SQLITE_OK ){
  2966   2968             return rc;
  2967   2969           }
  2968   2970           assert( iFreePg==iLastPg );
  2969   2971           releasePage(pFreePg);
  2970   2972         }
  2971   2973       } else {
  2972   2974         Pgno iFreePg;             /* Index of free page to move pLastPg to */
  2973   2975         MemPage *pLastPg;
         2976  +      u8 eMode = BTALLOC_ANY;   /* Mode parameter for allocateBtreePage() */
         2977  +      Pgno iNear = 0;           /* nearby parameter for allocateBtreePage() */
  2974   2978   
  2975   2979         rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0);
  2976   2980         if( rc!=SQLITE_OK ){
  2977   2981           return rc;
  2978   2982         }
  2979   2983   
  2980         -      /* If nFin is zero, this loop runs exactly once and page pLastPg
         2984  +      /* If bCommit is zero, this loop runs exactly once and page pLastPg
  2981   2985         ** is swapped with the first free page pulled off the free list.
  2982   2986         **
  2983         -      ** On the other hand, if nFin is greater than zero, then keep
         2987  +      ** On the other hand, if bCommit is greater than zero, then keep
  2984   2988         ** looping until a free-page located within the first nFin pages
  2985   2989         ** of the file is found.
  2986   2990         */
         2991  +      if( bCommit==0 ){
         2992  +        eMode = BTALLOC_LE;
         2993  +        iNear = nFin;
         2994  +      }
  2987   2995         do {
  2988   2996           MemPage *pFreePg;
  2989         -        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, 0, 0);
         2997  +        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode);
  2990   2998           if( rc!=SQLITE_OK ){
  2991   2999             releasePage(pLastPg);
  2992   3000             return rc;
  2993   3001           }
  2994   3002           releasePage(pFreePg);
  2995         -      }while( nFin!=0 && iFreePg>nFin );
         3003  +      }while( bCommit && iFreePg>nFin );
  2996   3004         assert( iFreePg<iLastPg );
  2997   3005         
  2998   3006         rc = sqlite3PagerWrite(pLastPg->pDbPage);
  2999   3007         if( rc==SQLITE_OK ){
  3000   3008           rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0);
  3001   3009         }
  3002   3010         releasePage(pLastPg);
  3003   3011         if( rc!=SQLITE_OK ){
  3004   3012           return rc;
  3005   3013         }
  3006   3014       }
  3007   3015     }
  3008   3016   
  3009         -  if( nFin==0 ){
         3017  +  if( bCommit==0 ){
  3010   3018       iLastPg--;
  3011   3019       while( iLastPg==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, iLastPg) ){
  3012   3020         if( PTRMAP_ISPAGE(pBt, iLastPg) ){
  3013   3021           MemPage *pPg;
  3014   3022           rc = btreeGetPage(pBt, iLastPg, &pPg, 0);
  3015   3023           if( rc!=SQLITE_OK ){
  3016   3024             return rc;
................................................................................
  3024   3032         iLastPg--;
  3025   3033       }
  3026   3034       sqlite3PagerTruncateImage(pBt->pPager, iLastPg);
  3027   3035       pBt->nPage = iLastPg;
  3028   3036     }
  3029   3037     return SQLITE_OK;
  3030   3038   }
         3039  +
         3040  +/*
         3041  +** The database opened by the first argument is an auto-vacuum database
         3042  +** nOrig pages in size containing nFree free pages. Return the expected 
         3043  +** size of the database in pages following an auto-vacuum operation.
         3044  +*/
         3045  +static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){
         3046  +  int nEntry;                     /* Number of entries on one ptrmap page */
         3047  +  Pgno nPtrmap;                   /* Number of PtrMap pages to be freed */
         3048  +  Pgno nFin;                      /* Return value */
         3049  +
         3050  +  nEntry = pBt->usableSize/5;
         3051  +  nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry;
         3052  +  nFin = nOrig - nFree - nPtrmap;
         3053  +  if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){
         3054  +    nFin--;
         3055  +  }
         3056  +  while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
         3057  +    nFin--;
         3058  +  }
         3059  +  if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT;
         3060  +
         3061  +  return nFin;
         3062  +}
  3031   3063   
  3032   3064   /*
  3033   3065   ** A write-transaction must be opened before calling this function.
  3034   3066   ** It performs a single unit of work towards an incremental vacuum.
  3035   3067   **
  3036   3068   ** If the incremental vacuum is finished after this function has run,
  3037   3069   ** SQLITE_DONE is returned. If it is not finished, but no error occurred,
................................................................................
  3042   3074     BtShared *pBt = p->pBt;
  3043   3075   
  3044   3076     sqlite3BtreeEnter(p);
  3045   3077     assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE );
  3046   3078     if( !pBt->autoVacuum ){
  3047   3079       rc = SQLITE_DONE;
  3048   3080     }else{
  3049         -    invalidateAllOverflowCache(pBt);
  3050         -    rc = incrVacuumStep(pBt, 0, btreePagecount(pBt));
  3051         -    if( rc==SQLITE_OK ){
  3052         -      rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
  3053         -      put4byte(&pBt->pPage1->aData[28], pBt->nPage);
         3081  +    Pgno nOrig = btreePagecount(pBt);
         3082  +    Pgno nFree = get4byte(&pBt->pPage1->aData[36]);
         3083  +    Pgno nFin = finalDbSize(pBt, nOrig, nFree);
         3084  +
         3085  +    if( nFin<nOrig ){
         3086  +      invalidateAllOverflowCache(pBt);
         3087  +      rc = incrVacuumStep(pBt, nFin, nOrig, 0);
         3088  +      if( rc==SQLITE_OK ){
         3089  +        rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
         3090  +        put4byte(&pBt->pPage1->aData[28], pBt->nPage);
         3091  +      }
         3092  +    }else{
         3093  +      rc = SQLITE_DONE;
  3054   3094       }
  3055   3095     }
  3056   3096     sqlite3BtreeLeave(p);
  3057   3097     return rc;
  3058   3098   }
  3059   3099   
  3060   3100   /*
................................................................................
  3073   3113   
  3074   3114     assert( sqlite3_mutex_held(pBt->mutex) );
  3075   3115     invalidateAllOverflowCache(pBt);
  3076   3116     assert(pBt->autoVacuum);
  3077   3117     if( !pBt->incrVacuum ){
  3078   3118       Pgno nFin;         /* Number of pages in database after autovacuuming */
  3079   3119       Pgno nFree;        /* Number of pages on the freelist initially */
  3080         -    Pgno nPtrmap;      /* Number of PtrMap pages to be freed */
  3081   3120       Pgno iFree;        /* The next page to be freed */
  3082         -    int nEntry;        /* Number of entries on one ptrmap page */
  3083   3121       Pgno nOrig;        /* Database size before freeing */
  3084   3122   
  3085   3123       nOrig = btreePagecount(pBt);
  3086   3124       if( PTRMAP_ISPAGE(pBt, nOrig) || nOrig==PENDING_BYTE_PAGE(pBt) ){
  3087   3125         /* It is not possible to create a database for which the final page
  3088   3126         ** is either a pointer-map page or the pending-byte page. If one
  3089   3127         ** is encountered, this indicates corruption.
  3090   3128         */
  3091   3129         return SQLITE_CORRUPT_BKPT;
  3092   3130       }
  3093   3131   
  3094   3132       nFree = get4byte(&pBt->pPage1->aData[36]);
  3095         -    nEntry = pBt->usableSize/5;
  3096         -    nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry;
  3097         -    nFin = nOrig - nFree - nPtrmap;
  3098         -    if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){
  3099         -      nFin--;
  3100         -    }
  3101         -    while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
  3102         -      nFin--;
  3103         -    }
         3133  +    nFin = finalDbSize(pBt, nOrig, nFree);
  3104   3134       if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT;
  3105   3135   
  3106   3136       for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){
  3107         -      rc = incrVacuumStep(pBt, nFin, iFree);
         3137  +      rc = incrVacuumStep(pBt, nFin, iFree, 1);
  3108   3138       }
  3109   3139       if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){
  3110   3140         rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
  3111   3141         put4byte(&pBt->pPage1->aData[32], 0);
  3112   3142         put4byte(&pBt->pPage1->aData[36], 0);
  3113   3143         put4byte(&pBt->pPage1->aData[28], nFin);
  3114   3144         sqlite3PagerTruncateImage(pBt->pPager, nFin);
................................................................................
  4863   4893   ** is only used by auto-vacuum databases when allocating a new table.
  4864   4894   */
  4865   4895   static int allocateBtreePage(
  4866   4896     BtShared *pBt, 
  4867   4897     MemPage **ppPage, 
  4868   4898     Pgno *pPgno, 
  4869   4899     Pgno nearby,
  4870         -  u8 exact
         4900  +  u8 eMode
  4871   4901   ){
  4872   4902     MemPage *pPage1;
  4873   4903     int rc;
  4874   4904     u32 n;     /* Number of pages on the freelist */
  4875   4905     u32 k;     /* Number of leaves on the trunk of the freelist */
  4876   4906     MemPage *pTrunk = 0;
  4877   4907     MemPage *pPrevTrunk = 0;
................................................................................
  4891   4921       u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
  4892   4922       
  4893   4923       /* If the 'exact' parameter was true and a query of the pointer-map
  4894   4924       ** shows that the page 'nearby' is somewhere on the free-list, then
  4895   4925       ** the entire-list will be searched for that page.
  4896   4926       */
  4897   4927   #ifndef SQLITE_OMIT_AUTOVACUUM
  4898         -    if( exact && nearby<=mxPage ){
  4899         -      u8 eType;
  4900         -      assert( nearby>0 );
  4901         -      assert( pBt->autoVacuum );
  4902         -      rc = ptrmapGet(pBt, nearby, &eType, 0);
  4903         -      if( rc ) return rc;
  4904         -      if( eType==PTRMAP_FREEPAGE ){
  4905         -        searchList = 1;
         4928  +    if( eMode==BTALLOC_EXACT ){
         4929  +      if( nearby<=mxPage ){
         4930  +        u8 eType;
         4931  +        assert( nearby>0 );
         4932  +        assert( pBt->autoVacuum );
         4933  +        rc = ptrmapGet(pBt, nearby, &eType, 0);
         4934  +        if( rc ) return rc;
         4935  +        if( eType==PTRMAP_FREEPAGE ){
         4936  +          searchList = 1;
         4937  +        }
  4906   4938         }
  4907         -      *pPgno = nearby;
         4939  +    }else if( eMode==BTALLOC_LE ){
         4940  +      searchList = 1;
  4908   4941       }
  4909   4942   #endif
  4910   4943   
  4911   4944       /* Decrement the free-list count by 1. Set iTrunk to the index of the
  4912   4945       ** first free-list trunk page. iPrevTrunk is initially 1.
  4913   4946       */
  4914   4947       rc = sqlite3PagerWrite(pPage1->pDbPage);
................................................................................
  4955   4988           pTrunk = 0;
  4956   4989           TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
  4957   4990         }else if( k>(u32)(pBt->usableSize/4 - 2) ){
  4958   4991           /* Value of k is out of range.  Database corruption */
  4959   4992           rc = SQLITE_CORRUPT_BKPT;
  4960   4993           goto end_allocate_page;
  4961   4994   #ifndef SQLITE_OMIT_AUTOVACUUM
  4962         -      }else if( searchList && nearby==iTrunk ){
         4995  +      }else if( searchList 
         4996  +            && (nearby==iTrunk || (iTrunk<nearby && eMode==BTALLOC_LE)) 
         4997  +      ){
  4963   4998           /* The list is being searched and this trunk page is the page
  4964   4999           ** to allocate, regardless of whether it has leaves.
  4965   5000           */
  4966         -        assert( *pPgno==iTrunk );
         5001  +        *pPgno = iTrunk;
  4967   5002           *ppPage = pTrunk;
  4968   5003           searchList = 0;
  4969   5004           rc = sqlite3PagerWrite(pTrunk->pDbPage);
  4970   5005           if( rc ){
  4971   5006             goto end_allocate_page;
  4972   5007           }
  4973   5008           if( k==0 ){
................................................................................
  5043   5078           iPage = get4byte(&aData[8+closest*4]);
  5044   5079           testcase( iPage==mxPage );
  5045   5080           if( iPage>mxPage ){
  5046   5081             rc = SQLITE_CORRUPT_BKPT;
  5047   5082             goto end_allocate_page;
  5048   5083           }
  5049   5084           testcase( iPage==mxPage );
  5050         -        if( !searchList || iPage==nearby ){
         5085  +        if( !searchList 
         5086  +         || (iPage==nearby || (iPage<nearby && eMode==BTALLOC_LE)) 
         5087  +        ){
  5051   5088             int noContent;
  5052   5089             *pPgno = iPage;
  5053   5090             TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
  5054   5091                    ": %d more free pages\n",
  5055   5092                    *pPgno, closest+1, k, pTrunk->pgno, n-1));
  5056   5093             rc = sqlite3PagerWrite(pTrunk->pDbPage);
  5057   5094             if( rc ) goto end_allocate_page;
................................................................................
  7115   7152       }
  7116   7153       assert( pgnoRoot>=3 );
  7117   7154   
  7118   7155       /* Allocate a page. The page that currently resides at pgnoRoot will
  7119   7156       ** be moved to the allocated page (unless the allocated page happens
  7120   7157       ** to reside at pgnoRoot).
  7121   7158       */
  7122         -    rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1);
         7159  +    rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT);
  7123   7160       if( rc!=SQLITE_OK ){
  7124   7161         return rc;
  7125   7162       }
  7126   7163   
  7127   7164       if( pgnoMove!=pgnoRoot ){
  7128   7165         /* pgnoRoot is the page that will be used for the root-page of
  7129   7166         ** the new table (assuming an error did not occur). But we were