/ Check-in [db7d62c8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid attempting to mmap memory from an offset that is not a multiple of the system page size on systems with page sizes larger than 32KB.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: db7d62c8d58eb1e8654a762c9b199ae4e2759038
User & Date: dan 2014-03-24 11:23:17
Context
2014-03-24
15:00
Fix arithmetic operators so that they do not change the affinity of their input operands. Ticket [a8a0d2996a]. check-in: 221f8f94 user: drh tags: trunk
12:33
Cancel column-cache entries that are involved in a comparison operator since the comparison might have forced an affinity change. Originally proposed as a fix for ticket [a8a0d2996a], but later determined to be incorrect. Closed-Leaf check-in: 0b95b7a8 user: drh tags: tkt-a8a0d2996a
11:23
Avoid attempting to mmap memory from an offset that is not a multiple of the system page size on systems with page sizes larger than 32KB. check-in: db7d62c8 user: dan tags: trunk
2014-03-23
16:29
Avoid a possible use of an uninitialized variable following an I/O or OOM error. check-in: 641408a1 user: drh tags: trunk
2014-03-20
09:42
Add a test to ensure os_unix.c works with 64KiB OS pages. Closed-Leaf check-in: e3d2be3b user: dan tags: shm-mapping-fix
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

   319    319   */
   320    320   static int posixFchown(int fd, uid_t uid, gid_t gid){
   321    321     return geteuid() ? 0 : fchown(fd,uid,gid);
   322    322   }
   323    323   
   324    324   /* Forward reference */
   325    325   static int openDirectory(const char*, int*);
          326  +static int unixGetpagesize(void);
   326    327   
   327    328   /*
   328    329   ** Many system calls are accessed through pointer-to-functions so that
   329    330   ** they may be overridden at runtime to facilitate fault injection during
   330    331   ** testing and sandboxing.  The following array holds the names and pointers
   331    332   ** to all overrideable system calls.
   332    333   */
................................................................................
   441    442   #if HAVE_MREMAP
   442    443     { "mremap",       (sqlite3_syscall_ptr)mremap,          0 },
   443    444   #else
   444    445     { "mremap",       (sqlite3_syscall_ptr)0,               0 },
   445    446   #endif
   446    447   #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)
   447    448   #endif
          449  +
          450  +  { "getpagesize",  (sqlite3_syscall_ptr)unixGetpagesize, 0 },
          451  +#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent)
   448    452   
   449    453   }; /* End of the overrideable system calls */
   450    454   
   451    455   /*
   452    456   ** This is the xSetSystemCall() method of sqlite3_vfs for all of the
   453    457   ** "unix" VFSes.  Return SQLITE_OK opon successfully updating the
   454    458   ** system call pointer, or SQLITE_NOTFOUND if there is no configurable
................................................................................
  4101   4105              pShmNode->sharedMask, pShmNode->exclMask));
  4102   4106     }
  4103   4107   #endif
  4104   4108   
  4105   4109     return rc;        
  4106   4110   }
  4107   4111   
         4112  +/*
         4113  +** Return the system page size.
         4114  +**
         4115  +** This function should not be called directly by other code in this file. 
         4116  +** Instead, it should be called via macro osGetpagesize().
         4117  +*/
         4118  +static int unixGetpagesize(void){
         4119  +#if defined(_BSD_SOURCE)
         4120  +  return getpagesize();
         4121  +#else
         4122  +  return (int)sysconf(_SC_PAGESIZE);
         4123  +#endif
         4124  +}
         4125  +
         4126  +/*
         4127  +** Return the minimum number of 32KB shm regions that should be mapped at
         4128  +** a time, assuming that each mapping must be an integer multiple of the
         4129  +** current system page-size.
         4130  +**
         4131  +** Usually, this is 1. The exception seems to be systems that are configured
         4132  +** to use 64KB pages - in this case each mapping must cover at least two
         4133  +** shm regions.
         4134  +*/
         4135  +static int unixShmRegionPerMap(void){
         4136  +  int shmsz = 32*1024;            /* SHM region size */
         4137  +  int pgsz = osGetpagesize();   /* System page size */
         4138  +  assert( ((pgsz-1)&pgsz)==0 );   /* Page size must be a power of 2 */
         4139  +  if( pgsz<shmsz ) return 1;
         4140  +  return pgsz/shmsz;
         4141  +}
  4108   4142   
  4109   4143   /*
  4110   4144   ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
  4111   4145   **
  4112   4146   ** This is not a VFS shared-memory method; it is a utility function called
  4113   4147   ** by VFS shared-memory methods.
  4114   4148   */
  4115   4149   static void unixShmPurge(unixFile *pFd){
  4116   4150     unixShmNode *p = pFd->pInode->pShmNode;
  4117   4151     assert( unixMutexHeld() );
  4118   4152     if( p && p->nRef==0 ){
         4153  +    int nShmPerMap = unixShmRegionPerMap();
  4119   4154       int i;
  4120   4155       assert( p->pInode==pFd->pInode );
  4121   4156       sqlite3_mutex_free(p->mutex);
  4122         -    for(i=0; i<p->nRegion; i++){
         4157  +    for(i=0; i<p->nRegion; i+=nShmPerMap){
  4123   4158         if( p->h>=0 ){
  4124   4159           osMunmap(p->apRegion[i], p->szRegion);
  4125   4160         }else{
  4126   4161           sqlite3_free(p->apRegion[i]);
  4127   4162         }
  4128   4163       }
  4129   4164       sqlite3_free(p->apRegion);
................................................................................
  4322   4357     int bExtend,                    /* True to extend file if necessary */
  4323   4358     void volatile **pp              /* OUT: Mapped memory */
  4324   4359   ){
  4325   4360     unixFile *pDbFd = (unixFile*)fd;
  4326   4361     unixShm *p;
  4327   4362     unixShmNode *pShmNode;
  4328   4363     int rc = SQLITE_OK;
         4364  +  int nShmPerMap = unixShmRegionPerMap();
         4365  +  int nReqRegion;
  4329   4366   
  4330   4367     /* If the shared-memory file has not yet been opened, open it now. */
  4331   4368     if( pDbFd->pShm==0 ){
  4332   4369       rc = unixOpenSharedMemory(pDbFd);
  4333   4370       if( rc!=SQLITE_OK ) return rc;
  4334   4371     }
  4335   4372   
................................................................................
  4337   4374     pShmNode = p->pShmNode;
  4338   4375     sqlite3_mutex_enter(pShmNode->mutex);
  4339   4376     assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
  4340   4377     assert( pShmNode->pInode==pDbFd->pInode );
  4341   4378     assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
  4342   4379     assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
  4343   4380   
  4344         -  if( pShmNode->nRegion<=iRegion ){
         4381  +  /* Minimum number of regions required to be mapped. */
         4382  +  nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;
         4383  +
         4384  +  if( pShmNode->nRegion<nReqRegion ){
  4345   4385       char **apNew;                      /* New apRegion[] array */
  4346         -    int nByte = (iRegion+1)*szRegion;  /* Minimum required file size */
         4386  +    int nByte = nReqRegion*szRegion;   /* Minimum required file size */
  4347   4387       struct stat sStat;                 /* Used by fstat() */
  4348   4388   
  4349   4389       pShmNode->szRegion = szRegion;
  4350   4390   
  4351   4391       if( pShmNode->h>=0 ){
  4352   4392         /* The requested region is not mapped into this processes address space.
  4353   4393         ** Check to see if it has been allocated (i.e. if the wal-index file is
................................................................................
  4388   4428             }
  4389   4429           }
  4390   4430         }
  4391   4431       }
  4392   4432   
  4393   4433       /* Map the requested memory region into this processes address space. */
  4394   4434       apNew = (char **)sqlite3_realloc(
  4395         -        pShmNode->apRegion, (iRegion+1)*sizeof(char *)
         4435  +        pShmNode->apRegion, nReqRegion*sizeof(char *)
  4396   4436       );
  4397   4437       if( !apNew ){
  4398   4438         rc = SQLITE_IOERR_NOMEM;
  4399   4439         goto shmpage_out;
  4400   4440       }
  4401   4441       pShmNode->apRegion = apNew;
  4402         -    while(pShmNode->nRegion<=iRegion){
         4442  +    while( pShmNode->nRegion<nReqRegion ){
         4443  +      int nMap = szRegion*nShmPerMap;
         4444  +      int i;
  4403   4445         void *pMem;
  4404   4446         if( pShmNode->h>=0 ){
  4405         -        pMem = osMmap(0, szRegion,
         4447  +        pMem = osMmap(0, nMap,
  4406   4448               pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 
  4407   4449               MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
  4408   4450           );
  4409   4451           if( pMem==MAP_FAILED ){
  4410   4452             rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
  4411   4453             goto shmpage_out;
  4412   4454           }
................................................................................
  4414   4456           pMem = sqlite3_malloc(szRegion);
  4415   4457           if( pMem==0 ){
  4416   4458             rc = SQLITE_NOMEM;
  4417   4459             goto shmpage_out;
  4418   4460           }
  4419   4461           memset(pMem, 0, szRegion);
  4420   4462         }
  4421         -      pShmNode->apRegion[pShmNode->nRegion] = pMem;
  4422         -      pShmNode->nRegion++;
         4463  +
         4464  +      for(i=0; i<nShmPerMap; i++){
         4465  +        pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i];
         4466  +      }
         4467  +      pShmNode->nRegion += nShmPerMap;
  4423   4468       }
  4424   4469     }
  4425   4470   
  4426   4471   shmpage_out:
  4427   4472     if( pShmNode->nRegion>iRegion ){
  4428   4473       *pp = pShmNode->apRegion[iRegion];
  4429   4474     }else{
................................................................................
  4629   4674       osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
  4630   4675       pFd->pMapRegion = 0;
  4631   4676       pFd->mmapSize = 0;
  4632   4677       pFd->mmapSizeActual = 0;
  4633   4678     }
  4634   4679   }
  4635   4680   
  4636         -/*
  4637         -** Return the system page size.
  4638         -*/
  4639         -static int unixGetPagesize(void){
  4640         -#if HAVE_MREMAP
  4641         -  return 512;
  4642         -#elif defined(_BSD_SOURCE)
  4643         -  return getpagesize();
  4644         -#else
  4645         -  return (int)sysconf(_SC_PAGESIZE);
  4646         -#endif
  4647         -}
  4648         -
  4649   4681   /*
  4650   4682   ** Attempt to set the size of the memory mapping maintained by file 
  4651   4683   ** descriptor pFd to nNew bytes. Any existing mapping is discarded.
  4652   4684   **
  4653   4685   ** If successful, this function sets the following variables:
  4654   4686   **
  4655   4687   **       unixFile.pMapRegion
................................................................................
  4678   4710     assert( nNew>0 );
  4679   4711     assert( pFd->mmapSizeActual>=pFd->mmapSize );
  4680   4712     assert( MAP_FAILED!=0 );
  4681   4713   
  4682   4714     if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
  4683   4715   
  4684   4716     if( pOrig ){
  4685         -    const int szSyspage = unixGetPagesize();
         4717  +#if HAVE_MREMAP
         4718  +    i64 nReuse = pFd->mmapSize;
         4719  +#else
         4720  +    const int szSyspage = osGetpagesize();
  4686   4721       i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
         4722  +#endif
  4687   4723       u8 *pReq = &pOrig[nReuse];
  4688   4724   
  4689   4725       /* Unmap any pages of the existing mapping that cannot be reused. */
  4690   4726       if( nReuse!=nOrig ){
  4691   4727         osMunmap(pReq, nOrig-nReuse);
  4692   4728       }
  4693   4729   
................................................................................
  7425   7461       UNIXVFS("unix-proxy",    proxyIoFinder ),
  7426   7462   #endif
  7427   7463     };
  7428   7464     unsigned int i;          /* Loop counter */
  7429   7465   
  7430   7466     /* Double-check that the aSyscall[] array has been constructed
  7431   7467     ** correctly.  See ticket [bb3a86e890c8e96ab] */
  7432         -  assert( ArraySize(aSyscall)==24 );
         7468  +  assert( ArraySize(aSyscall)==25 );
  7433   7469   
  7434   7470     /* Register all VFSes defined in the aVfs[] array */
  7435   7471     for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
  7436   7472       sqlite3_vfs_register(&aVfs[i], i==0);
  7437   7473     }
  7438   7474     return SQLITE_OK; 
  7439   7475   }

Changes to src/test_syscall.c.

    63     63   **
    64     64   **   test_syscall exists SYSTEM-CALL
    65     65   **     Return true if the named system call exists. Or false otherwise.
    66     66   **
    67     67   **   test_syscall list
    68     68   **     Return a list of all system calls. The list is constructed using
    69     69   **     the xNextSystemCall() VFS method.
           70  +**
           71  +**   test_syscall pagesize PGSZ
           72  +**     If PGSZ is a power of two greater than 256, install a wrapper around
           73  +**     OS function getpagesize() that reports the system page size as PGSZ.
           74  +**     Or, if PGSZ is less than zero, remove any wrapper already installed.
    70     75   */
    71     76   
    72     77   #include "sqliteInt.h"
    73     78   #include "sqlite3.h"
    74     79   #include "tcl.h"
    75     80   #include <stdlib.h>
    76     81   #include <string.h>
................................................................................
    85     90   #include <sys/types.h>
    86     91   #include <errno.h>
    87     92   
    88     93   static struct TestSyscallGlobal {
    89     94     int bPersist;                   /* 1 for persistent errors, 0 for transient */
    90     95     int nCount;                     /* Fail after this many more calls */
    91     96     int nFail;                      /* Number of failures that have occurred */
    92         -} gSyscall = { 0, 0 };
           97  +  int pgsz;
           98  +  sqlite3_syscall_ptr orig_getpagesize;
           99  +} gSyscall = { 0, 0, 0, 0, 0 };
    93    100   
    94    101   static int ts_open(const char *, int, int);
    95    102   static int ts_close(int fd);
    96    103   static int ts_access(const char *zPath, int mode);
    97    104   static char *ts_getcwd(char *zPath, size_t nPath);
    98    105   static int ts_stat(const char *zPath, struct stat *p);
    99    106   static int ts_fstat(int fd, struct stat *p);
................................................................................
   645    652       return TCL_ERROR;
   646    653     }
   647    654   
   648    655     pVfs = sqlite3_vfs_find(0);
   649    656     Tcl_SetObjResult(interp, Tcl_NewStringObj(pVfs->zName, -1));
   650    657     return TCL_OK;
   651    658   }
          659  +
          660  +static int ts_getpagesize(void){
          661  +  return gSyscall.pgsz;
          662  +}
          663  +
          664  +static int test_syscall_pagesize(
          665  +  void * clientData,
          666  +  Tcl_Interp *interp,
          667  +  int objc,
          668  +  Tcl_Obj *CONST objv[]
          669  +){
          670  +  sqlite3_vfs *pVfs = sqlite3_vfs_find(0);
          671  +  int pgsz;
          672  +  if( objc!=3 ){
          673  +    Tcl_WrongNumArgs(interp, 2, objv, "PGSZ");
          674  +    return TCL_ERROR;
          675  +  }
          676  +  if( Tcl_GetIntFromObj(interp, objv[2], &pgsz) ){
          677  +    return TCL_ERROR;
          678  +  }
          679  +
          680  +  if( pgsz<0 ){
          681  +    if( gSyscall.orig_getpagesize ){
          682  +      pVfs->xSetSystemCall(pVfs, "getpagesize", gSyscall.orig_getpagesize);
          683  +    }
          684  +  }else{
          685  +    if( pgsz<512 || (pgsz & (pgsz-1)) ){
          686  +      Tcl_AppendResult(interp, "pgsz out of range", 0);
          687  +      return TCL_ERROR;
          688  +    }
          689  +    gSyscall.orig_getpagesize = pVfs->xGetSystemCall(pVfs, "getpagesize");
          690  +    gSyscall.pgsz = pgsz;
          691  +    pVfs->xSetSystemCall(
          692  +        pVfs, "getpagesize", (sqlite3_syscall_ptr)ts_getpagesize
          693  +    );
          694  +  }
          695  +
          696  +  return TCL_OK;
          697  +}
   652    698   
   653    699   static int test_syscall(
   654    700     void * clientData,
   655    701     Tcl_Interp *interp,
   656    702     int objc,
   657    703     Tcl_Obj *CONST objv[]
   658    704   ){
................................................................................
   664    710       { "install",    test_syscall_install },
   665    711       { "uninstall",  test_syscall_uninstall },
   666    712       { "reset",      test_syscall_reset },
   667    713       { "errno",      test_syscall_errno },
   668    714       { "exists",     test_syscall_exists },
   669    715       { "list",       test_syscall_list },
   670    716       { "defaultvfs", test_syscall_defaultvfs },
          717  +    { "pagesize",   test_syscall_pagesize },
   671    718       { 0, 0 }
   672    719     };
   673    720     int iCmd;
   674    721     int rc;
   675    722   
   676    723     if( objc<2 ){
   677    724       Tcl_WrongNumArgs(interp, 1, objv, "SUB-COMMAND ...");

Changes to test/syscall.test.

    57     57   # Tests for the xNextSystemCall method.
    58     58   #
    59     59   foreach s {
    60     60       open close access getcwd stat fstat ftruncate
    61     61       fcntl read pread write pwrite fchmod fallocate
    62     62       pread64 pwrite64 unlink openDirectory mkdir rmdir 
    63     63       statvfs fchown umask mmap munmap mremap
           64  +    getpagesize
    64     65   } {
    65     66     if {[test_syscall exists $s]} {lappend syscall_list $s}
    66     67   }
    67     68   do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list]
    68     69   
    69     70   #-------------------------------------------------------------------------
    70     71   # This test verifies that if a call to open() fails and errno is set to

Added test/wal64k.test.

            1  +# 2010 April 13
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this file is testing the operation of the library in
           13  +# "PRAGMA journal_mode=WAL" mode.
           14  +#
           15  +
           16  +set testdir [file dirname $argv0]
           17  +source $testdir/tester.tcl
           18  +set testprefix wal64k
           19  +
           20  +ifcapable !wal {finish_test ; return }
           21  +
           22  +db close
           23  +test_syscall pagesize 65536
           24  +sqlite3 db test.db
           25  +
           26  +do_execsql_test 1.0 { 
           27  +  PRAGMA journal_mode = WAL;
           28  +  CREATE TABLE t1(x);
           29  +  CREATE INDEX i1 ON t1(x);
           30  +} {wal}
           31  +do_test 1.1 { file size test.db-shm } {65536}
           32  +
           33  +do_test 1.2 {
           34  +  execsql BEGIN
           35  +  while {[file size test.db-shm]==65536} {
           36  +    execsql { INSERT INTO t1 VALUES( randstr(900,1100) ) }
           37  +  }
           38  +  execsql COMMIT
           39  +  file size test.db-shm
           40  +} {131072}
           41  +
           42  +integrity_check 1.3
           43  +
           44  +db close
           45  +test_syscall pagesize -1
           46  +finish_test
           47  +