/ Check-in [a3a9a2e1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Have this branch maintain an in-memory hash-table of old pages for read-only MVCC clients. There is no way to access it yet.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | server-process-edition
Files: files | file ages | folders
SHA3-256: a3a9a2e1899cc963315590ef4666972e9d92986843706a551962ed16661a19b2
User & Date: dan 2017-07-07 16:12:45
Context
2017-07-07
16:40
Merge latest trunk changes with this branch. check-in: 216c757f user: dan tags: server-process-edition
16:12
Have this branch maintain an in-memory hash-table of old pages for read-only MVCC clients. There is no way to access it yet. check-in: a3a9a2e1 user: dan tags: server-process-edition
2017-06-28
20:21
Merge tserver fixes with this branch. check-in: 58a0aab8 user: dan tags: server-process-edition
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/pager.c.

   704    704     PCache *pPCache;            /* Pointer to page cache object */
   705    705   #ifndef SQLITE_OMIT_WAL
   706    706     Wal *pWal;                  /* Write-ahead log used by "journal_mode=wal" */
   707    707     char *zWal;                 /* File name for write-ahead log */
   708    708   #endif
   709    709   #ifdef SQLITE_SERVER_EDITION
   710    710     Server *pServer;
          711  +  ServerPage *pServerPage;
   711    712   #endif
   712    713   };
   713    714   
   714    715   /*
   715    716   ** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains
   716    717   ** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS 
   717    718   ** or CACHE_WRITE to sqlite3_db_status().
................................................................................
  1785   1786         testcase( rc==SQLITE_NOMEM );
  1786   1787         assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
  1787   1788       }
  1788   1789     }
  1789   1790     return rc;
  1790   1791   }
  1791   1792   
         1793  +#ifdef SQLITE_SERVER_EDITION
         1794  +static void pagerFreeServerPage(Pager *pPager){
         1795  +  ServerPage *pPg;
         1796  +  ServerPage *pNext;
         1797  +  for(pPg=pPager->pServerPage; pPg; pPg=pNext){
         1798  +    pNext = pPg->pNext;
         1799  +    sqlite3_free(pPg);
         1800  +  }
         1801  +  pPager->pServerPage = 0;
         1802  +}
         1803  +#endif
         1804  +
  1792   1805   /*
  1793   1806   ** This function is a no-op if the pager is in exclusive mode and not
  1794   1807   ** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN
  1795   1808   ** state.
  1796   1809   **
  1797   1810   ** If the pager is not in exclusive-access mode, the database file is
  1798   1811   ** completely unlocked. If the file is unlocked and the file-system does
................................................................................
  1815   1828   
  1816   1829     sqlite3BitvecDestroy(pPager->pInJournal);
  1817   1830     pPager->pInJournal = 0;
  1818   1831     releaseAllSavepoints(pPager);
  1819   1832   
  1820   1833   #ifdef SQLITE_SERVER_EDITION
  1821   1834     if( pagerIsServer(pPager) ){
         1835  +    pagerFreeServerPage(pPager);
  1822   1836       sqlite3ServerEnd(pPager->pServer);
  1823   1837       pPager->eState = PAGER_OPEN;
  1824   1838     }else 
  1825   1839   #endif
  1826   1840     if( pagerUseWal(pPager) ){
  1827   1841       assert( !isOpen(pPager->jfd) );
  1828   1842       sqlite3WalEndReadTransaction(pPager->pWal);
................................................................................
  4354   4368     int rc = SQLITE_OK;                  /* Return code */
  4355   4369   
  4356   4370     /* This function is only called for rollback pagers in WRITER_DBMOD state. */
  4357   4371     assert( !pagerUseWal(pPager) );
  4358   4372     assert( pPager->tempFile || pPager->eState==PAGER_WRITER_DBMOD );
  4359   4373     assert( pPager->eLock==EXCLUSIVE_LOCK );
  4360   4374     assert( isOpen(pPager->fd) || pList->pDirty==0 );
         4375  +
         4376  +#ifdef SQLITE_SERVER_EDITION
         4377  +  if( pagerIsServer(pPager) ){
         4378  +    rc = sqlite3ServerPreCommit(pPager->pServer, pPager->pServerPage);
         4379  +    pPager->pServerPage = 0;
         4380  +    if( rc!=SQLITE_OK ) return rc;
         4381  +  }
         4382  +#endif
  4361   4383   
  4362   4384     /* If the file is a temp-file has not yet been opened, open it now. It
  4363   4385     ** is not possible for rc to be other than SQLITE_OK if this branch
  4364   4386     ** is taken, as pager_wait_on_lock() is a no-op for temp-files.
  4365   4387     */
  4366   4388     if( !isOpen(pPager->fd) ){
  4367   4389       assert( pPager->tempFile && rc==SQLITE_OK );
................................................................................
  4531   4553   ** page clean, the IO error code is returned. If the page cannot be
  4532   4554   ** made clean for some other reason, but no error occurs, then SQLITE_OK
  4533   4555   ** is returned by sqlite3PcacheMakeClean() is not called.
  4534   4556   */
  4535   4557   static int pagerStress(void *p, PgHdr *pPg){
  4536   4558     Pager *pPager = (Pager *)p;
  4537   4559     int rc = SQLITE_OK;
         4560  +
         4561  +  if( pagerIsServer(pPager) ) return SQLITE_OK;
  4538   4562   
  4539   4563     assert( pPg->pPager==pPager );
  4540   4564     assert( pPg->flags&PGHDR_DIRTY );
  4541   4565   
  4542   4566     /* The doNotSpill NOSYNC bit is set during times when doing a sync of
  4543   4567     ** journal (and adding a new header) is not allowed.  This occurs
  4544   4568     ** during calls to sqlite3PagerWrite() while trying to journal multiple
................................................................................
  5904   5928   */
  5905   5929   static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){
  5906   5930     Pager *pPager = pPg->pPager;
  5907   5931     int rc;
  5908   5932     u32 cksum;
  5909   5933     char *pData2;
  5910   5934     i64 iOff = pPager->journalOff;
         5935  +
         5936  +#ifdef SQLITE_SERVER_EDITION
         5937  +  if( pagerIsServer(pPager) ){
         5938  +    int nByte = sizeof(ServerPage) + pPager->pageSize;
         5939  +    ServerPage *p = (ServerPage*)sqlite3_malloc(nByte);
         5940  +    if( !p ) return SQLITE_NOMEM_BKPT;
         5941  +    memset(p, 0, sizeof(ServerPage));
         5942  +    p->aData = (u8*)&p[1];
         5943  +    p->nData = pPager->pageSize;
         5944  +    p->pgno = pPg->pgno;
         5945  +    p->pNext = pPager->pServerPage;
         5946  +    pPager->pServerPage = p;
         5947  +  }
         5948  +#endif
  5911   5949   
  5912   5950     /* We should never write to the journal file the page that
  5913   5951     ** contains the database locks.  The following assert verifies
  5914   5952     ** that we do not. */
  5915   5953     assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
  5916   5954   
  5917   5955     assert( pPager->journalHdr<=pPager->journalOff );

Changes to src/server.c.

     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12     12   */
    13     13   
    14     14   #include "sqliteInt.h"
    15     15   
           16  +#ifdef SQLITE_SERVER_EDITION
           17  +
    16     18   /*
    17     19   ** Page-locking slot format:
    18     20   **
    19     21   **   Assuming HMA_MAX_TRANSACTIONID is set to 16.
    20     22   **
    21     23   **   The least-significant 16 bits are used for read locks. When a read
    22     24   **   lock is taken, the client sets the bit associated with its 
    23     25   **   transaction-id.
    24     26   **
    25         -**   The next 8 bits are set to the number of transient-read locks 
    26         -**   currently held on the page.
    27         -**
    28     27   **   The next 5 bits are set to 0 if no client currently holds a write
    29     28   **   lock. Or to (transaction-id + 1) if a write lock is held.
           29  +**
           30  +**   The next 8 bits are set to the number of transient-read locks 
           31  +**   currently held on the page.
    30     32   */
           33  +#define HMA_SLOT_RL_BITS 16       /* bits for Read Locks */
           34  +#define HMA_SLOT_WL_BITS 5        /* bits for Write Locks */
           35  +#define HMA_SLOT_TR_BITS 8        /* bits for Transient Reader locks */
    31     36   
    32         -#ifdef SQLITE_SERVER_EDITION
           37  +#define HMA_SLOT_RLWL_BITS (HMA_SLOT_RL_BITS + HMA_SLOT_WL_BITS)
           38  +
           39  +
           40  +#define HMA_SLOT_RL_MASK ((1 << HMA_SLOT_RL_BITS)-1)
           41  +#define HMA_SLOT_WL_MASK (((1 << HMA_SLOT_WL_BITS)-1) << HMA_SLOT_RL_BITS)
           42  +#define HMA_SLOT_TR_MASK (((1 << HMA_SLOT_TR_BITS)-1) << HMA_SLOT_RLWL_BITS)
           43  +
    33     44   
    34     45   /* Number of page-locking slots */
    35     46   #define HMA_PAGELOCK_SLOTS (256*1024)
    36     47   
    37     48   /* Maximum concurrent read/write transactions */
    38     49   #define HMA_MAX_TRANSACTIONID 16
    39     50   
           51  +
           52  +#define HMA_HASH_SIZE 512
           53  +
    40     54   /*
    41     55   ** The argument to this macro is the value of a locking slot. It returns
    42     56   ** -1 if no client currently holds the write lock, or the transaction-id
    43     57   ** of the locker otherwise.
    44     58   */
    45         -#define slotGetWriter(v) (((int)((v) >> HMA_MAX_TRANSACTIONID) & 0x1f) -1)
           59  +#define slotGetWriter(v) ((((int)(v)&HMA_SLOT_WL_MASK) >> HMA_SLOT_RL_BITS) - 1)
    46     60   
    47         -#define slotReaderMask(v) ((v) & ((1 << HMA_MAX_TRANSACTIONID)-1))
           61  +/*
           62  +** The argument to this macro is the value of a locking slot. This macro
           63  +** returns the current number of slow reader clients reading the page.
           64  +*/
           65  +#define slotGetSlowReaders(v) (((v) & HMA_SLOT_TR_MASK) >> HMA_SLOT_RLWL_BITS)
           66  +
           67  +#define slotReaderMask(v) ((v) & HMA_SLOT_RL_MASK)
    48     68   
    49     69   #include "unistd.h"
    50     70   #include "fcntl.h"
    51     71   #include "sys/mman.h"
    52     72   #include "sys/types.h"
    53     73   #include "sys/stat.h"
    54     74   #include "errno.h"
................................................................................
    78     98     u32 *aSlot;                     /* Array of page locking slots */
    79     99     i64 aFileId[2];                 /* Opaque VFS file-id */
    80    100     ServerDb *pNext;                /* Next db in this process */
    81    101   
    82    102     sqlite3_vfs *pVfs;
    83    103     ServerJournal aJrnl[HMA_MAX_TRANSACTIONID];
    84    104     u8 *aJrnlFdSpace;
          105  +
          106  +  int iNextCommit;                /* Commit id for next pre-commit call */ 
          107  +  Server *pCommit;                /* List of connections currently commiting */
          108  +  Server *pReader;                /* Connections in slower-reader transaction */
          109  +  ServerPage *pPgFirst;           /* First (oldest) in list of pages */
          110  +  ServerPage *pPgLast;            /* Last (newest) in list of pages */
          111  +  ServerPage *apPg[HMA_HASH_SIZE];
    85    112   };
    86    113   
    87    114   /*
    88    115   ** Once instance for each client connection open on a server mode database
    89    116   ** in this process.
    90    117   */
    91    118   struct Server {
    92    119     ServerDb *pDb;                  /* Database object */
    93    120     Pager *pPager;                  /* Associated pager object */
    94    121     int iTransId;                   /* Current transaction id (or -1) */
          122  +  int iCommitId;                  /* Current comit id (or 0) */
    95    123     int nAlloc;                     /* Allocated size of aLock[] array */
    96    124     int nLock;                      /* Number of entries in aLock[] */
    97    125     u32 *aLock;                     /* Mapped lock file */
          126  +  Server *pNext;                  /* Next in pCommit or pReader list */
    98    127   };
    99    128   
   100    129   #define SERVER_WRITE_LOCK 3
   101    130   #define SERVER_READ_LOCK  2
   102    131   #define SERVER_NO_LOCK    1
   103    132   
   104    133   /*
................................................................................
   138    167   
   139    168         if( rc==SQLITE_NOMEM ){
   140    169           sqlite3_free(p->aSlot);
   141    170           sqlite3_free(p);
   142    171           p = 0;
   143    172         }else{
   144    173           p->nClient = 1;
          174  +        p->iNextCommit = 1;
   145    175           p->aFileId[0] = aFileId[0];
   146    176           p->aFileId[1] = aFileId[1];
   147    177           p->pNext = g_server.pDb;
   148    178           g_server.pDb = p;
   149    179         }
   150    180       }else{
   151    181         rc = SQLITE_NOMEM_BKPT;
................................................................................
   348    378   }
   349    379   
   350    380   /*
   351    381   ** End a transaction (and release all locks).
   352    382   */
   353    383   int sqlite3ServerEnd(Server *p){
   354    384     int rc = SQLITE_OK;
          385  +  Server **pp;
   355    386     ServerDb *pDb = p->pDb;
          387  +  ServerPage *pFree = 0;
          388  +  ServerPage *pPg = 0;
   356    389     sqlite3_mutex_enter(pDb->mutex);
   357    390   
   358    391     serverReleaseLocks(p);
          392  +
          393  +  /* Clear the bit in the transaction mask. */
   359    394     pDb->transmask &= ~((u32)1 << p->iTransId);
          395  +
          396  +  /* If this connection is in the committers list, remove it. */
          397  +  for(pp=&pDb->pCommit; *pp; pp = &((*pp)->pNext)){
          398  +    if( *pp==p ){
          399  +      *pp = p->pNext;
          400  +      break;
          401  +    }
          402  +  }
          403  +
          404  +  /* See if it is possible to free any ServerPage records. If so, remove
          405  +  ** them from the linked list and hash table, but do not call sqlite3_free()
          406  +  ** on them until the mutex has been released.  */
          407  +  if( pDb->pPgFirst ){
          408  +    Server *pIter;
          409  +    int iOldest = 0x7FFFFFFF;
          410  +    for(pIter=pDb->pReader; pIter; pIter=pIter->pNext){
          411  +      iOldest = MIN(iOldest, pIter->iCommitId);
          412  +    }
          413  +    for(pIter=pDb->pCommit; pIter; pIter=pIter->pNext){
          414  +      iOldest = MIN(iOldest, pIter->iCommitId);
          415  +    }
          416  +
          417  +    pFree = pDb->pPgFirst;
          418  +    for(pPg=pDb->pPgFirst; pPg && pPg->iCommitId<iOldest; pPg=pPg->pNext){
          419  +      if( pPg->pHashPrev ){
          420  +        pPg->pHashPrev->pHashNext = pPg->pHashNext;
          421  +      }else{
          422  +        int iHash = pPg->pgno % HMA_HASH_SIZE;
          423  +        assert( pDb->apPg[iHash]==pPg );
          424  +        pDb->apPg[iHash] = pPg->pHashNext;
          425  +      }
          426  +      if( pPg->pHashNext ){
          427  +        pPg->pHashNext->pHashPrev = pPg->pHashPrev;
          428  +      }
          429  +    }
          430  +    if( pPg==0 ){
          431  +      pDb->pPgFirst = pDb->pPgLast = 0;
          432  +    }else{
          433  +      pDb->pPgFirst = pPg;
          434  +    }
          435  +  }
   360    436   
   361    437     sqlite3_mutex_leave(pDb->mutex);
          438  +
          439  +  /* Call sqlite3_free() on any pages that were unlinked from the hash
          440  +  ** table above. */
          441  +  while( pFree && pFree!=pPg ){
          442  +    ServerPage *pNext = pFree->pNext;
          443  +    sqlite3_free(pFree);
          444  +    pFree = pNext;
          445  +  }
          446  +
   362    447     p->iTransId = -1;
          448  +  p->iCommitId = 0;
          449  +  return rc;
          450  +}
          451  +
          452  +int sqlite3ServerPreCommit(Server *p, ServerPage *pPg){
          453  +  ServerDb *pDb = p->pDb;
          454  +  int rc = SQLITE_OK;
          455  +  ServerPage *pIter;
          456  +  ServerPage *pNext;
          457  +
          458  +  if( pPg==0 ) return SQLITE_OK;
          459  +
          460  +  sqlite3_mutex_enter(pDb->mutex);
          461  +
          462  +  /* Assign a commit id to this transaction */
          463  +  assert( p->iCommitId==0 );
          464  +  p->iCommitId = pDb->iNextCommit++;
          465  +
          466  +  /* Iterate through all pages. For each:
          467  +  **
          468  +  **   1. Set the iCommitId field.
          469  +  **   2. Add the page to the hash table.
          470  +  **   3. Wait until all slow-reader locks have cleared.
          471  +  */
          472  +  for(pIter=pPg; pIter; pIter=pIter->pNext){
          473  +    u32 *pSlot = &pDb->aSlot[pIter->pgno % HMA_PAGELOCK_SLOTS];
          474  +    int iHash = pIter->pgno % HMA_HASH_SIZE;
          475  +    pIter->iCommitId = p->iCommitId;
          476  +    pIter->pHashNext = pDb->apPg[iHash];
          477  +    if( pIter->pHashNext ){
          478  +      pIter->pHashNext->pHashPrev = pIter;
          479  +    }
          480  +    pDb->apPg[iHash] = pIter;
          481  +
          482  +    /* TODO: Something better than this! */
          483  +    while( slotGetSlowReaders(*pSlot)>0 ){
          484  +      sqlite3_mutex_leave(pDb->mutex);
          485  +      sqlite3_mutex_enter(pDb->mutex);
          486  +    }
          487  +
          488  +    /* If pIter is the last element in the list, append the new list to
          489  +    ** the ServerDb.pPgFirst/pPgLast list at this point.  */
          490  +    if( pIter->pNext==0 ){
          491  +      if( pDb->pPgLast ){
          492  +        assert( pDb->pPgFirst );
          493  +        pDb->pPgLast->pNext = pPg;
          494  +      }else{
          495  +        assert( pDb->pPgFirst==0 );
          496  +        pDb->pPgFirst = pPg;
          497  +      }
          498  +      pDb->pPgLast = pIter;
          499  +    }
          500  +  }
          501  +
          502  +  /* Add this connection to the list of current committers */
          503  +  assert( p->pNext==0 );
          504  +  p->pNext = pDb->pCommit;
          505  +  pDb->pCommit = p;
          506  +
          507  +  sqlite3_mutex_leave(pDb->mutex);
   363    508     return rc;
   364    509   }
   365    510   
   366    511   /*
   367    512   ** Release all write-locks.
   368    513   */
   369    514   int sqlite3ServerReleaseWriteLocks(Server *p){
................................................................................
   380    525   int sqlite3ServerLock(Server *p, Pgno pgno, int bWrite, int bBlock){
   381    526     int rc = SQLITE_OK;
   382    527     ServerDb *pDb = p->pDb;
   383    528     int iWriter;
   384    529     int bSkip = 0;
   385    530     u32 *pSlot;
   386    531   
          532  +  assert( p->iTransId>=0 );
   387    533     assert( p->nLock<=p->nAlloc );
   388    534     if( p->nLock==p->nAlloc ){
   389    535       int nNew = p->nLock ? p->nLock*2 : 256;
   390    536       u32 *aNew = sqlite3_realloc(p->aLock, nNew*sizeof(u32));
   391    537       if( aNew==0 ) return SQLITE_NOMEM_BKPT;
   392    538       memset(&aNew[p->nLock], 0, sizeof(u32) * (nNew - p->nLock));
   393    539       p->nAlloc = nNew;
   394    540       p->aLock = aNew;
   395    541     }
   396    542   
   397         -  assert( p->iTransId>=0 );
          543  +  sqlite3_mutex_enter(pDb->mutex);
   398    544   
   399         -  sqlite3_mutex_enter(pDb->mutex);
   400    545     pSlot = &pDb->aSlot[pgno % HMA_PAGELOCK_SLOTS];
          546  +  assert( slotGetWriter(*pSlot)<0 
          547  +       || slotReaderMask(*pSlot)==0 
          548  +       || slotReaderMask(*pSlot)==(1 << slotGetWriter(*pSlot))
          549  +  );
          550  +
   401    551     iWriter = slotGetWriter(*pSlot);
   402    552     if( iWriter==p->iTransId || (bWrite==0 && (*pSlot & (1<<p->iTransId))) ){
   403    553       bSkip = 1;
   404    554     }else if( iWriter>=0 ){
   405    555       rc = SQLITE_BUSY_DEADLOCK;
   406    556     }else if( bWrite ){
   407    557       if( (slotReaderMask(*pSlot) & ~(1 << p->iTransId))==0 ){

Changes to src/server.h.

    15     15   
    16     16   #ifndef SQLITE_SERVER_H
    17     17   #define SQLITE_SERVER_H
    18     18   
    19     19   
    20     20   typedef struct Server Server;
    21     21   
    22         -int sqlite3ServerConnect(Pager *pPager, Server **ppOut);
           22  +typedef struct ServerPage ServerPage;
           23  +struct ServerPage {
           24  +  Pgno pgno;                      /* Page number for this record */
           25  +  int nData;                      /* Size of aData[] in bytes */
           26  +  u8 *aData;
           27  +  ServerPage *pNext;
           28  +
           29  +  int iCommitId;
           30  +  ServerPage *pHashNext;
           31  +  ServerPage *pHashPrev;
           32  +};
    23     33   
           34  +int sqlite3ServerConnect(Pager *pPager, Server **ppOut);
    24     35   void sqlite3ServerDisconnect(Server *p, sqlite3_file *dbfd);
    25     36   
    26     37   int sqlite3ServerBegin(Server *p);
           38  +int sqlite3ServerPreCommit(Server*, ServerPage*);
    27     39   int sqlite3ServerEnd(Server *p);
           40  +
    28     41   int sqlite3ServerReleaseWriteLocks(Server *p);
    29     42   
    30     43   int sqlite3ServerLock(Server *p, Pgno pgno, int bWrite, int bBlock);
    31     44   
    32     45   int sqlite3ServerHasLock(Server *p, Pgno pgno, int bWrite);
    33     46   
    34     47   #endif /* SQLITE_SERVER_H */
    35         -
    36     48   #endif /* SQLITE_SERVER_EDITION */
           49  +