SQLite

Changes On Branch avoid-fallocate
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch avoid-fallocate Excluding Merge-Ins

This is equivalent to a diff from 49d23ef6 to 9c7523da

2013-04-26
17:00
Avoid using posix_fallocate() in WAL mode, as it is not supported by all file-systems. (check-in: 1bbb4be1 user: dan tags: trunk)
16:09
Avoid using posix_fallocate() in WAL mode, as it is not supported by all file-systems. (Closed-Leaf check-in: 9c7523da user: dan tags: avoid-fallocate)
15:55
Update documentation with new hyperlinks. No changes to code. (check-in: 640eb54a user: drh tags: trunk)
14:13
Rebalance FTS expressions after parsing to limit recursion during evaluation. Avoid recursion when deleting FTS expression trees. Enforce a limit (currently 12) on the depth of an expression tree. (check-in: 49d23ef6 user: dan tags: trunk)
14:04
Merge another trunk change. (Closed-Leaf check-in: 2648966f user: dan tags: fts3-expr-rebalance)
12:08
Reduce the default SQLITE_MAX_MMAP_SIZE slightly so that it fits in a signed 32-bit integer. (check-in: 460752b8 user: drh tags: trunk)

Changes to src/os_unix.c.

3188
3189
3190
3191
3192
3193
3194













































3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
  }else{
    pFile->lastErrno = 0; /* not a system error */
    /* Unread parts of the buffer must be zero-filled */
    memset(&((char*)pBuf)[got], 0, amt-got);
    return SQLITE_IOERR_SHORT_READ;
  }
}














































/*
** Seek to the offset in id->offset then read cnt bytes into pBuf.
** Return the number of bytes actually read.  Update the offset.
**
** To avoid stomping the errno value on a failed write the lastErrno value
** is set before returning.
*/
static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
  int got;
#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
  i64 newOffset;
#endif
  assert( cnt==(cnt&0x1ffff) );
  cnt &= 0x1ffff;
  TIMER_START;
#if defined(USE_PREAD)
  do{ got = osPwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
#elif defined(USE_PREAD64)
  do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR);
#else
  do{
    newOffset = lseek(id->h, offset, SEEK_SET);
    SimulateIOError( newOffset-- );
    if( newOffset!=offset ){
      if( newOffset == -1 ){
        ((unixFile*)id)->lastErrno = errno;
      }else{
        ((unixFile*)id)->lastErrno = 0;
      }
      return -1;
    }
    got = osWrite(id->h, pBuf, cnt);
  }while( got<0 && errno==EINTR );
#endif
  TIMER_END;
  if( got<0 ){
    ((unixFile*)id)->lastErrno = errno;
  }

  OSTRACE(("WRITE   %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED));
  return got;
}


/*
** Write data from a buffer into a file.  Return SQLITE_OK on success
** or some other error code on failure.
*/







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<







3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248






























3249


3250
3251
3252
3253
3254
3255
3256
  }else{
    pFile->lastErrno = 0; /* not a system error */
    /* Unread parts of the buffer must be zero-filled */
    memset(&((char*)pBuf)[got], 0, amt-got);
    return SQLITE_IOERR_SHORT_READ;
  }
}

/*
** Attempt to seek the file-descriptor passed as the first argument to
** absolute offset iOff, then attempt to write nBuf bytes of data from
** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 
** return the actual number of bytes written (which may be less than
** nBuf).
*/
static int seekAndWriteFd(
  int fd,                         /* File descriptor to write to */
  i64 iOff,                       /* File offset to begin writing at */
  const void *pBuf,               /* Copy data from this buffer to the file */
  int nBuf,                       /* Size of buffer pBuf in bytes */
  int *piErrno                    /* OUT: Error number if error occurs */
){
  int rc = 0;                     /* Value returned by system call */

  assert( nBuf==(nBuf&0x1ffff) );
  nBuf &= 0x1ffff;
  TIMER_START;

#if defined(USE_PREAD)
  do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );
#elif defined(USE_PREAD64)
  do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);
#else
  do{
    i64 iSeek = lseek(fd, iOff, SEEK_SET);
    SimulateIOError( iSeek-- );

    if( iSeek!=iOff ){
      if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0);
      return -1;
    }
    rc = osWrite(fd, pBuf, nBuf);
  }while( rc<0 && errno==EINTR );
#endif

  TIMER_END;
  OSTRACE(("WRITE   %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));

  if( rc<0 && piErrno ) *piErrno = errno;
  return rc;
}


/*
** Seek to the offset in id->offset then read cnt bytes into pBuf.
** Return the number of bytes actually read.  Update the offset.
**
** To avoid stomping the errno value on a failed write the lastErrno value
** is set before returning.
*/
static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){






























  return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);


}


/*
** Write data from a buffer into a file.  Return SQLITE_OK on success
** or some other error code on failure.
*/
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335








4336


4337





4338
4339
4340
4341
4342


4343
4344
4345
4346
4347
4348
4349
        rc = SQLITE_IOERR_SHMSIZE;
        goto shmpage_out;
      }
  
      if( sStat.st_size<nByte ){
        /* The requested memory region does not exist. If bExtend is set to
        ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
        **
        ** Alternatively, if bExtend is true, use ftruncate() to allocate
        ** the requested memory region.
        */
        if( !bExtend ) goto shmpage_out;
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
        if( osFallocate(pShmNode->h, sStat.st_size, nByte)!=0 ){
          rc = unixLogError(SQLITE_IOERR_SHMSIZE, "fallocate",
                            pShmNode->zFilename);
          goto shmpage_out;
        }








#else


        if( robust_ftruncate(pShmNode->h, nByte) ){





          rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate",
                            pShmNode->zFilename);
          goto shmpage_out;
        }
#endif


      }
    }

    /* Map the requested memory region into this processes address space. */
    apNew = (char **)sqlite3_realloc(
        pShmNode->apRegion, (iRegion+1)*sizeof(char *)
    );







<
<
<

|
<
<
<
<


>
>
>
>
>
>
>
>
|
>
>
|
>
>
>
>
>
|
<
|
|
<
>
>







4331
4332
4333
4334
4335
4336
4337



4338
4339




4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359

4360
4361

4362
4363
4364
4365
4366
4367
4368
4369
4370
        rc = SQLITE_IOERR_SHMSIZE;
        goto shmpage_out;
      }
  
      if( sStat.st_size<nByte ){
        /* The requested memory region does not exist. If bExtend is set to
        ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.



        */
        if( !bExtend ){




          goto shmpage_out;
        }

        /* Alternatively, if bExtend is true, extend the file. Do this by
        ** writing a single byte to the end of each (OS) page being
        ** allocated or extended. Technically, we need only write to the
        ** last page in order to extend the file. But writing to all new
        ** pages forces the OS to allocate them immediately, which reduces
        ** the chances of SIGBUS while accessing the mapped region later on.
        */
        else{
          static const int pgsz = 4096;
          int iPg;

          /* Write to the last byte of each newly allocated or extended page */
          assert( (nByte % pgsz)==0 );
          for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
            if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){
              const char *zFile = pShmNode->zFilename;
              rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);

              goto shmpage_out;
            }

          }
        }
      }
    }

    /* Map the requested memory region into this processes address space. */
    apNew = (char **)sqlite3_realloc(
        pShmNode->apRegion, (iRegion+1)*sizeof(char *)
    );