SQLite

Check-in [6f3a5c24d2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add an experimental fix to avoid attempting to mmap memory from an offset that is not a multiple of the system page size on systems with page sizes larger than 32KB.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | shm-mapping-fix
Files: files | file ages | folders
SHA1: 6f3a5c24d254fc6faf607b505bdef4a7aafc21af
User & Date: dan 2014-03-20 08:59:47.455
Context
2014-03-20
09:42
Add a test to ensure os_unix.c works with 64KiB OS pages. (Closed-Leaf check-in: e3d2be3ba4 user: dan tags: shm-mapping-fix)
08:59
Add an experimental fix to avoid attempting to mmap memory from an offset that is not a multiple of the system page size on systems with page sizes larger than 32KB. (check-in: 6f3a5c24d2 user: dan tags: shm-mapping-fix)
2014-03-19
20:01
Avoid some unnecessary calls to sqlite3VdbeRecordUnpack() that were being made when merging data from two or more temp files together in vdbesort.c (check-in: 707ea170b3 user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os_unix.c.
4101
4102
4103
4104
4105
4106
4107



























4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118

4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
           pShmNode->sharedMask, pShmNode->exclMask));
  }
#endif

  return rc;        
}





























/*
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
** by VFS shared-memory methods.
*/
static void unixShmPurge(unixFile *pFd){
  unixShmNode *p = pFd->pInode->pShmNode;
  assert( unixMutexHeld() );
  if( p && p->nRef==0 ){

    int i;
    assert( p->pInode==pFd->pInode );
    sqlite3_mutex_free(p->mutex);
    for(i=0; i<p->nRegion; i++){
      if( p->h>=0 ){
        osMunmap(p->apRegion[i], p->szRegion);
      }else{
        sqlite3_free(p->apRegion[i]);
      }
    }
    sqlite3_free(p->apRegion);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











>



|







4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
           pShmNode->sharedMask, pShmNode->exclMask));
  }
#endif

  return rc;        
}

/*
** Return the system page size.
*/
static int unixGetPagesize(void){
#if defined(_BSD_SOURCE)
  return getpagesize();
#else
  return (int)sysconf(_SC_PAGESIZE);
#endif
}

/*
** Return the minimum number of 32KB shm regions that should be mapped at
** a time, assuming that each mapping must be an integer multiple of the
** current system page-size.
**
** Usually, this is 1. The exception seems to be systems that are configured
** to use 64KB pages - in this case each mapping must cover at least two
** shm regions.
*/
static int unixShmRegionPerMap(void){
  int shmsz = 32*1024;            /* SHM region size */
  int pgsz = unixGetPagesize();   /* System page size */
  assert( ((pgsz-1)&pgsz)==0 );   /* Page size must be a power of 2 */
  if( pgsz<shmsz ) return 1;
  return pgsz/shmsz;
}

/*
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
** by VFS shared-memory methods.
*/
static void unixShmPurge(unixFile *pFd){
  unixShmNode *p = pFd->pInode->pShmNode;
  assert( unixMutexHeld() );
  if( p && p->nRef==0 ){
    int nShmPerMap = unixShmRegionPerMap();
    int i;
    assert( p->pInode==pFd->pInode );
    sqlite3_mutex_free(p->mutex);
    for(i=0; i<p->nRegion; i+=nShmPerMap){
      if( p->h>=0 ){
        osMunmap(p->apRegion[i], p->szRegion);
      }else{
        sqlite3_free(p->apRegion[i]);
      }
    }
    sqlite3_free(p->apRegion);
4322
4323
4324
4325
4326
4327
4328


4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343



4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
  int bExtend,                    /* True to extend file if necessary */
  void volatile **pp              /* OUT: Mapped memory */
){
  unixFile *pDbFd = (unixFile*)fd;
  unixShm *p;
  unixShmNode *pShmNode;
  int rc = SQLITE_OK;



  /* If the shared-memory file has not yet been opened, open it now. */
  if( pDbFd->pShm==0 ){
    rc = unixOpenSharedMemory(pDbFd);
    if( rc!=SQLITE_OK ) return rc;
  }

  p = pDbFd->pShm;
  pShmNode = p->pShmNode;
  sqlite3_mutex_enter(pShmNode->mutex);
  assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
  assert( pShmNode->pInode==pDbFd->pInode );
  assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
  assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );




  if( pShmNode->nRegion<=iRegion ){
    char **apNew;                      /* New apRegion[] array */
    int nByte = (iRegion+1)*szRegion;  /* Minimum required file size */
    struct stat sStat;                 /* Used by fstat() */

    pShmNode->szRegion = szRegion;

    if( pShmNode->h>=0 ){
      /* The requested region is not mapped into this processes address space.
      ** Check to see if it has been allocated (i.e. if the wal-index file is







>
>















>
>
>
|

|







4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
  int bExtend,                    /* True to extend file if necessary */
  void volatile **pp              /* OUT: Mapped memory */
){
  unixFile *pDbFd = (unixFile*)fd;
  unixShm *p;
  unixShmNode *pShmNode;
  int rc = SQLITE_OK;
  int nShmPerMap = unixShmRegionPerMap();
  int nReqRegion;

  /* If the shared-memory file has not yet been opened, open it now. */
  if( pDbFd->pShm==0 ){
    rc = unixOpenSharedMemory(pDbFd);
    if( rc!=SQLITE_OK ) return rc;
  }

  p = pDbFd->pShm;
  pShmNode = p->pShmNode;
  sqlite3_mutex_enter(pShmNode->mutex);
  assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
  assert( pShmNode->pInode==pDbFd->pInode );
  assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
  assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );

  /* Minimum number of regions required to be mapped. */
  nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;

  if( pShmNode->nRegion<nReqRegion ){
    char **apNew;                      /* New apRegion[] array */
    int nByte = nReqRegion*szRegion;   /* Minimum required file size */
    struct stat sStat;                 /* Used by fstat() */

    pShmNode->szRegion = szRegion;

    if( pShmNode->h>=0 ){
      /* The requested region is not mapped into this processes address space.
      ** Check to see if it has been allocated (i.e. if the wal-index file is
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402


4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420


4421

4422
4423
4424
4425
4426
4427
4428
4429
          }
        }
      }
    }

    /* Map the requested memory region into this processes address space. */
    apNew = (char **)sqlite3_realloc(
        pShmNode->apRegion, (iRegion+1)*sizeof(char *)
    );
    if( !apNew ){
      rc = SQLITE_IOERR_NOMEM;
      goto shmpage_out;
    }
    pShmNode->apRegion = apNew;
    while(pShmNode->nRegion<=iRegion){


      void *pMem;
      if( pShmNode->h>=0 ){
        pMem = osMmap(0, szRegion,
            pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 
            MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
        );
        if( pMem==MAP_FAILED ){
          rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
          goto shmpage_out;
        }
      }else{
        pMem = sqlite3_malloc(szRegion);
        if( pMem==0 ){
          rc = SQLITE_NOMEM;
          goto shmpage_out;
        }
        memset(pMem, 0, szRegion);
      }


      pShmNode->apRegion[pShmNode->nRegion] = pMem;

      pShmNode->nRegion++;
    }
  }

shmpage_out:
  if( pShmNode->nRegion>iRegion ){
    *pp = pShmNode->apRegion[iRegion];
  }else{







|






|
>
>


|















>
>
|
>
|







4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
          }
        }
      }
    }

    /* Map the requested memory region into this processes address space. */
    apNew = (char **)sqlite3_realloc(
        pShmNode->apRegion, nReqRegion*sizeof(char *)
    );
    if( !apNew ){
      rc = SQLITE_IOERR_NOMEM;
      goto shmpage_out;
    }
    pShmNode->apRegion = apNew;
    while( pShmNode->nRegion<nReqRegion ){
      int nMap = szRegion*nShmPerMap;
      int i;
      void *pMem;
      if( pShmNode->h>=0 ){
        pMem = osMmap(0, nMap,
            pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 
            MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
        );
        if( pMem==MAP_FAILED ){
          rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
          goto shmpage_out;
        }
      }else{
        pMem = sqlite3_malloc(szRegion);
        if( pMem==0 ){
          rc = SQLITE_NOMEM;
          goto shmpage_out;
        }
        memset(pMem, 0, szRegion);
      }

      for(i=0; i<nShmPerMap; i++){
        pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i];
      }
      pShmNode->nRegion += nShmPerMap;
    }
  }

shmpage_out:
  if( pShmNode->nRegion>iRegion ){
    *pp = pShmNode->apRegion[iRegion];
  }else{
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
    osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
    pFd->pMapRegion = 0;
    pFd->mmapSize = 0;
    pFd->mmapSizeActual = 0;
  }
}

/*
** Return the system page size.
*/
static int unixGetPagesize(void){
#if HAVE_MREMAP
  return 512;
#elif defined(_BSD_SOURCE)
  return getpagesize();
#else
  return (int)sysconf(_SC_PAGESIZE);
#endif
}

/*
** Attempt to set the size of the memory mapping maintained by file 
** descriptor pFd to nNew bytes. Any existing mapping is discarded.
**
** If successful, this function sets the following variables:
**
**       unixFile.pMapRegion







<
<
<
<
<
<
<
<
<
<
<
<
<







4667
4668
4669
4670
4671
4672
4673













4674
4675
4676
4677
4678
4679
4680
    osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
    pFd->pMapRegion = 0;
    pFd->mmapSize = 0;
    pFd->mmapSizeActual = 0;
  }
}














/*
** Attempt to set the size of the memory mapping maintained by file 
** descriptor pFd to nNew bytes. Any existing mapping is discarded.
**
** If successful, this function sets the following variables:
**
**       unixFile.pMapRegion
4678
4679
4680
4681
4682
4683
4684



4685
4686

4687
4688
4689
4690
4691
4692
4693
  assert( nNew>0 );
  assert( pFd->mmapSizeActual>=pFd->mmapSize );
  assert( MAP_FAILED!=0 );

  if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;

  if( pOrig ){



    const int szSyspage = unixGetPagesize();
    i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));

    u8 *pReq = &pOrig[nReuse];

    /* Unmap any pages of the existing mapping that cannot be reused. */
    if( nReuse!=nOrig ){
      osMunmap(pReq, nOrig-nReuse);
    }








>
>
>


>







4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
  assert( nNew>0 );
  assert( pFd->mmapSizeActual>=pFd->mmapSize );
  assert( MAP_FAILED!=0 );

  if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;

  if( pOrig ){
#if HAVE_MREMAP
    i64 nReuse = pFd->mmapSize;
#else
    const int szSyspage = unixGetPagesize();
    i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
#endif
    u8 *pReq = &pOrig[nReuse];

    /* Unmap any pages of the existing mapping that cannot be reused. */
    if( nReuse!=nOrig ){
      osMunmap(pReq, nOrig-nReuse);
    }