/ Check-in [9c7523da]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid using posix_fallocate() in WAL mode, as it is not supported by all file-systems.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | avoid-fallocate
Files: files | file ages | folders
SHA1:9c7523dabf4aee609287732ce787c9b9a9087e7f
User & Date: dan 2013-04-26 16:09:29
Context
2013-04-26
17:00
Avoid using posix_fallocate() in WAL mode, as it is not supported by all file-systems. check-in: 1bbb4be1 user: dan tags: trunk
16:09
Avoid using posix_fallocate() in WAL mode, as it is not supported by all file-systems. Closed-Leaf check-in: 9c7523da user: dan tags: avoid-fallocate
14:13
Rebalance FTS expressions after parsing to limit recursion during evaluation. Avoid recursion when deleting FTS expression trees. Enforce a limit (currently 12) on the depth of an expression tree. check-in: 49d23ef6 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

  3188   3188     }else{
  3189   3189       pFile->lastErrno = 0; /* not a system error */
  3190   3190       /* Unread parts of the buffer must be zero-filled */
  3191   3191       memset(&((char*)pBuf)[got], 0, amt-got);
  3192   3192       return SQLITE_IOERR_SHORT_READ;
  3193   3193     }
  3194   3194   }
         3195  +
         3196  +/*
         3197  +** Attempt to seek the file-descriptor passed as the first argument to
         3198  +** absolute offset iOff, then attempt to write nBuf bytes of data from
         3199  +** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 
         3200  +** return the actual number of bytes written (which may be less than
         3201  +** nBuf).
         3202  +*/
         3203  +static int seekAndWriteFd(
         3204  +  int fd,                         /* File descriptor to write to */
         3205  +  i64 iOff,                       /* File offset to begin writing at */
         3206  +  const void *pBuf,               /* Copy data from this buffer to the file */
         3207  +  int nBuf,                       /* Size of buffer pBuf in bytes */
         3208  +  int *piErrno                    /* OUT: Error number if error occurs */
         3209  +){
         3210  +  int rc = 0;                     /* Value returned by system call */
         3211  +
         3212  +  assert( nBuf==(nBuf&0x1ffff) );
         3213  +  nBuf &= 0x1ffff;
         3214  +  TIMER_START;
         3215  +
         3216  +#if defined(USE_PREAD)
         3217  +  do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );
         3218  +#elif defined(USE_PREAD64)
         3219  +  do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);
         3220  +#else
         3221  +  do{
         3222  +    i64 iSeek = lseek(fd, iOff, SEEK_SET);
         3223  +    SimulateIOError( iSeek-- );
         3224  +
         3225  +    if( iSeek!=iOff ){
         3226  +      if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0);
         3227  +      return -1;
         3228  +    }
         3229  +    rc = osWrite(fd, pBuf, nBuf);
         3230  +  }while( rc<0 && errno==EINTR );
         3231  +#endif
         3232  +
         3233  +  TIMER_END;
         3234  +  OSTRACE(("WRITE   %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));
         3235  +
         3236  +  if( rc<0 && piErrno ) *piErrno = errno;
         3237  +  return rc;
         3238  +}
         3239  +
  3195   3240   
  3196   3241   /*
  3197   3242   ** Seek to the offset in id->offset then read cnt bytes into pBuf.
  3198   3243   ** Return the number of bytes actually read.  Update the offset.
  3199   3244   **
  3200   3245   ** To avoid stomping the errno value on a failed write the lastErrno value
  3201   3246   ** is set before returning.
  3202   3247   */
  3203   3248   static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
  3204         -  int got;
  3205         -#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
  3206         -  i64 newOffset;
  3207         -#endif
  3208         -  assert( cnt==(cnt&0x1ffff) );
  3209         -  cnt &= 0x1ffff;
  3210         -  TIMER_START;
  3211         -#if defined(USE_PREAD)
  3212         -  do{ got = osPwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
  3213         -#elif defined(USE_PREAD64)
  3214         -  do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR);
  3215         -#else
  3216         -  do{
  3217         -    newOffset = lseek(id->h, offset, SEEK_SET);
  3218         -    SimulateIOError( newOffset-- );
  3219         -    if( newOffset!=offset ){
  3220         -      if( newOffset == -1 ){
  3221         -        ((unixFile*)id)->lastErrno = errno;
  3222         -      }else{
  3223         -        ((unixFile*)id)->lastErrno = 0;
  3224         -      }
  3225         -      return -1;
  3226         -    }
  3227         -    got = osWrite(id->h, pBuf, cnt);
  3228         -  }while( got<0 && errno==EINTR );
  3229         -#endif
  3230         -  TIMER_END;
  3231         -  if( got<0 ){
  3232         -    ((unixFile*)id)->lastErrno = errno;
  3233         -  }
  3234         -
  3235         -  OSTRACE(("WRITE   %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED));
  3236         -  return got;
         3249  +  return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);
  3237   3250   }
  3238   3251   
  3239   3252   
  3240   3253   /*
  3241   3254   ** Write data from a buffer into a file.  Return SQLITE_OK on success
  3242   3255   ** or some other error code on failure.
  3243   3256   */
................................................................................
  4318   4331           rc = SQLITE_IOERR_SHMSIZE;
  4319   4332           goto shmpage_out;
  4320   4333         }
  4321   4334     
  4322   4335         if( sStat.st_size<nByte ){
  4323   4336           /* The requested memory region does not exist. If bExtend is set to
  4324   4337           ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
  4325         -        **
  4326         -        ** Alternatively, if bExtend is true, use ftruncate() to allocate
  4327         -        ** the requested memory region.
  4328   4338           */
  4329         -        if( !bExtend ) goto shmpage_out;
  4330         -#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
  4331         -        if( osFallocate(pShmNode->h, sStat.st_size, nByte)!=0 ){
  4332         -          rc = unixLogError(SQLITE_IOERR_SHMSIZE, "fallocate",
  4333         -                            pShmNode->zFilename);
         4339  +        if( !bExtend ){
  4334   4340             goto shmpage_out;
  4335   4341           }
  4336         -#else
  4337         -        if( robust_ftruncate(pShmNode->h, nByte) ){
  4338         -          rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate",
  4339         -                            pShmNode->zFilename);
  4340         -          goto shmpage_out;
         4342  +
         4343  +        /* Alternatively, if bExtend is true, extend the file. Do this by
         4344  +        ** writing a single byte to the end of each (OS) page being
         4345  +        ** allocated or extended. Technically, we need only write to the
         4346  +        ** last page in order to extend the file. But writing to all new
         4347  +        ** pages forces the OS to allocate them immediately, which reduces
         4348  +        ** the chances of SIGBUS while accessing the mapped region later on.
         4349  +        */
         4350  +        else{
         4351  +          static const int pgsz = 4096;
         4352  +          int iPg;
         4353  +
         4354  +          /* Write to the last byte of each newly allocated or extended page */
         4355  +          assert( (nByte % pgsz)==0 );
         4356  +          for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
         4357  +            if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){
         4358  +              const char *zFile = pShmNode->zFilename;
         4359  +              rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);
         4360  +              goto shmpage_out;
         4361  +            }
         4362  +          }
  4341   4363           }
  4342         -#endif
  4343   4364         }
  4344   4365       }
  4345   4366   
  4346   4367       /* Map the requested memory region into this processes address space. */
  4347   4368       apNew = (char **)sqlite3_realloc(
  4348   4369           pShmNode->apRegion, (iRegion+1)*sizeof(char *)
  4349   4370       );