diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2015-02-17 17:25:57 +0000 |
---|---|---|
committer | <> | 2015-03-17 16:26:24 +0000 |
commit | 780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch) | |
tree | 598f8b9fa431b228d29897e798de4ac0c1d3d970 /lang/sql/sqlite/src/os_unix.c | |
parent | 7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff) | |
download | berkeleydb-master.tar.gz |
Diffstat (limited to 'lang/sql/sqlite/src/os_unix.c')
-rw-r--r-- | lang/sql/sqlite/src/os_unix.c | 1445 |
1 files changed, 1106 insertions, 339 deletions
diff --git a/lang/sql/sqlite/src/os_unix.c b/lang/sql/sqlite/src/os_unix.c index 2626ab4d..b5395502 100644 --- a/lang/sql/sqlite/src/os_unix.c +++ b/lang/sql/sqlite/src/os_unix.c @@ -119,10 +119,11 @@ #include <time.h> #include <sys/time.h> #include <errno.h> -#ifndef SQLITE_OMIT_WAL +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 #include <sys/mman.h> #endif + #if SQLITE_ENABLE_LOCKING_STYLE # include <sys/ioctl.h> # if OS_VXWORKS @@ -138,6 +139,10 @@ # include <sys/mount.h> #endif +#ifdef HAVE_UTIME +# include <utime.h> +#endif + /* ** Allowed values of unixFile.fsFlags */ @@ -160,8 +165,8 @@ #endif /* - ** Default permissions when creating auto proxy dir - */ +** Default permissions when creating auto proxy dir +*/ #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 #endif @@ -202,17 +207,28 @@ struct UnixUnusedFd { typedef struct unixFile unixFile; struct unixFile { sqlite3_io_methods const *pMethod; /* Always the first entry */ + sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ unixInodeInfo *pInode; /* Info about locks on this inode */ int h; /* The file descriptor */ - int dirfd; /* File descriptor for the directory */ unsigned char eFileLock; /* The type of lock held on this fd */ - unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ + unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ int lastErrno; /* The unix errno from last I/O error */ void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ const char *zPath; /* Name of the file */ unixShm *pShm; /* Shared memory segment information */ int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ +#if SQLITE_MAX_MMAP_SIZE>0 + int nFetchOut; /* Number of outstanding xFetch refs */ + sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ + sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ + sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ + void *pMapRegion; /* Memory mapped region */ +#endif +#ifdef __QNXNTO__ + int sectorSize; /* Device sector size */ + int deviceCharacteristics; /* Precomputed device characteristics */ +#endif #if SQLITE_ENABLE_LOCKING_STYLE int openFlags; /* The flags specified at open() */ #endif @@ -220,10 +236,9 @@ struct unixFile { unsigned fsFlags; /* cached details from statfs() */ #endif #if OS_VXWORKS - int isDelete; /* Delete on close if true */ struct vxworksFileId *pId; /* Unique file ID */ #endif -#ifndef NDEBUG +#ifdef SQLITE_DEBUG /* The next group of variables are used to track whether or not the ** transaction counter in bytes 24-27 of database files are updated ** whenever any part of the database changes. An assertion fault will @@ -234,7 +249,9 @@ struct unixFile { unsigned char transCntrChng; /* True if the transaction counter changed */ unsigned char dbUpdate; /* True if any part of database file changed */ unsigned char inNormalWrite; /* True if in a normal write operation */ + #endif + #ifdef SQLITE_TEST /* In test mode, increase the size of this structure a bit so that ** it is larger than the struct CrashFile defined in test6.c. @@ -243,11 +260,28 @@ struct unixFile { #endif }; +/* This variable holds the process id (pid) from when the xRandomness() +** method was called. If xOpen() is called from a different process id, +** indicating that a fork() has occurred, the PRNG will be reset. +*/ +static int randomnessPid = 0; + /* ** Allowed values for the unixFile.ctrlFlags bitmask: */ -#define UNIXFILE_EXCL 0x01 /* Connections from one process only */ -#define UNIXFILE_RDONLY 0x02 /* Connection is read only */ +#define UNIXFILE_EXCL 0x01 /* Connections from one process only */ +#define UNIXFILE_RDONLY 0x02 /* Connection is read only */ +#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ +#ifndef SQLITE_DISABLE_DIRSYNC +# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ +#else +# define UNIXFILE_DIRSYNC 0x00 +#endif +#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ +#define UNIXFILE_DELETE 0x20 /* Delete on close */ +#define UNIXFILE_URI 0x40 /* Filename might have query parameters */ +#define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ +#define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings have been issued */ /* ** Include code that is common to all os_*.c files @@ -282,18 +316,53 @@ struct unixFile { #endif /* +** HAVE_MREMAP defaults to true on Linux and false everywhere else. +*/ +#if !defined(HAVE_MREMAP) +# if defined(__linux__) && defined(_GNU_SOURCE) +# define HAVE_MREMAP 1 +# else +# define HAVE_MREMAP 0 +# endif +#endif + +/* +** Different Unix systems declare open() in different ways. Same use +** open(const char*,int,mode_t). Others use open(const char*,int,...). +** The difference is important when using a pointer to the function. +** +** The safest way to deal with the problem is to always use this wrapper +** which always has the same well-defined interface. +*/ +static int posixOpen(const char *zFile, int flags, int mode){ + return open(zFile, flags, mode); +} + +/* +** On some systems, calls to fchown() will trigger a message in a security +** log if they come from non-root processes. So avoid calling fchown() if +** we are not running as root. +*/ +static int posixFchown(int fd, uid_t uid, gid_t gid){ + return geteuid() ? 0 : fchown(fd,uid,gid); +} + +/* Forward reference */ +static int openDirectory(const char*, int*); + +/* ** Many system calls are accessed through pointer-to-functions so that ** they may be overridden at runtime to facilitate fault injection during ** testing and sandboxing. The following array holds the names and pointers ** to all overrideable system calls. */ static struct unix_syscall { - const char *zName; /* Name of the sytem call */ + const char *zName; /* Name of the system call */ sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ sqlite3_syscall_ptr pDefault; /* Default value */ } aSyscall[] = { - { "open", (sqlite3_syscall_ptr)open, 0 }, -#define osOpen ((int(*)(const char*,int,...))aSyscall[0].pCurrent) + { "open", (sqlite3_syscall_ptr)posixOpen, 0 }, +#define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) { "close", (sqlite3_syscall_ptr)close, 0 }, #define osClose ((int(*)(int))aSyscall[1].pCurrent) @@ -330,7 +399,7 @@ static struct unix_syscall { { "read", (sqlite3_syscall_ptr)read, 0 }, #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) -#if defined(USE_PREAD) || defined(SQLITE_ENABLE_LOCKING_STYLE) +#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE { "pread", (sqlite3_syscall_ptr)pread, 0 }, #else { "pread", (sqlite3_syscall_ptr)0, 0 }, @@ -347,7 +416,7 @@ static struct unix_syscall { { "write", (sqlite3_syscall_ptr)write, 0 }, #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) -#if defined(USE_PREAD) || defined(SQLITE_ENABLE_LOCKING_STYLE) +#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, #else { "pwrite", (sqlite3_syscall_ptr)0, 0 }, @@ -363,11 +432,7 @@ static struct unix_syscall { #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\ aSyscall[13].pCurrent) -#if SQLITE_ENABLE_LOCKING_STYLE { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, -#else - { "fchmod", (sqlite3_syscall_ptr)0, 0 }, -#endif #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE @@ -377,6 +442,36 @@ static struct unix_syscall { #endif #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) + { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, +#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) + + { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, +#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) + + { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, +#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) + + { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, +#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) + + { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 }, +#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) + +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, +#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent) + + { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, +#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent) + +#if HAVE_MREMAP + { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, +#else + { "mremap", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) +#endif + }; /* End of the overrideable system calls */ /* @@ -463,12 +558,66 @@ static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ } /* -** Retry open() calls that fail due to EINTR +** Do not accept any file descriptor less than this value, in order to avoid +** opening database file using file descriptors that are commonly used for +** standard input, output, and error. */ -static int robust_open(const char *z, int f, int m){ - int rc; - do{ rc = osOpen(z,f,m); }while( rc<0 && errno==EINTR ); - return rc; +#ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR +# define SQLITE_MINIMUM_FILE_DESCRIPTOR 3 +#endif + +/* +** Invoke open(). Do so multiple times, until it either succeeds or +** fails for some reason other than EINTR. +** +** If the file creation mode "m" is 0 then set it to the default for +** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally +** 0644) as modified by the system umask. If m is not 0, then +** make the file creation mode be exactly m ignoring the umask. +** +** The m parameter will be non-zero only when creating -wal, -journal, +** and -shm files. We want those files to have *exactly* the same +** permissions as their original database, unadulterated by the umask. +** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a +** transaction crashes and leaves behind hot journals, then any +** process that is able to write to the database will also be able to +** recover the hot journals. +*/ +static int robust_open(const char *z, int f, mode_t m){ + int fd; + mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; + while(1){ +#if defined(O_CLOEXEC) + fd = osOpen(z,f|O_CLOEXEC,m2); +#else + fd = osOpen(z,f,m2); +#endif + if( fd<0 ){ + if( errno==EINTR ) continue; + break; + } + if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; + osClose(fd); + sqlite3_log(SQLITE_WARNING, + "attempt to open \"%s\" as file descriptor %d", z, fd); + fd = -1; + if( osOpen("/dev/null", f, m)<0 ) break; + } + if( fd>=0 ){ + if( m!=0 ){ + struct stat statbuf; + if( osFstat(fd, &statbuf)==0 + && statbuf.st_size==0 + && (statbuf.st_mode&0777)!=m + ){ + osFchmod(fd, m); + } + } +#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) + osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); +#endif + } + return fd; } /* @@ -498,7 +647,7 @@ static int unixMutexHeld(void) { #endif -#ifdef SQLITE_DEBUG +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) /* ** Helper function for printing out trace information from debugging ** binaries. This returns the string represetation of the supplied @@ -625,9 +774,9 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { case EACCES: /* EACCES is like EAGAIN during locking operations, but not any other time*/ if( (sqliteIOErr == SQLITE_IOERR_LOCK) || - (sqliteIOErr == SQLITE_IOERR_UNLOCK) || - (sqliteIOErr == SQLITE_IOERR_RDLOCK) || - (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){ + (sqliteIOErr == SQLITE_IOERR_UNLOCK) || + (sqliteIOErr == SQLITE_IOERR_RDLOCK) || + (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){ return SQLITE_BUSY; } /* else fall through */ @@ -661,7 +810,9 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { case ENODEV: case ENXIO: case ENOENT: +#ifdef ESTALE /* ESTALE is not defined on Interix systems */ case ESTALE: +#endif case ENOSYS: /* these should force the client to close the file and reconnect */ @@ -671,7 +822,6 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { } - /****************************************************************************** ****************** Begin Unique File ID Utility Used By VxWorks *************** ** @@ -933,7 +1083,7 @@ struct unixInodeInfo { UnixUnusedFd *pUnused; /* Unused file descriptors to close */ unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ unixInodeInfo *pPrev; /* .... doubly linked */ -#if defined(SQLITE_ENABLE_LOCKING_STYLE) +#if SQLITE_ENABLE_LOCKING_STYLE unsigned long long sharedByte; /* for AFP simulated shared lock */ #endif #if OS_VXWORKS @@ -960,7 +1110,7 @@ static unixInodeInfo *inodeList = 0; ** The first argument passed to the macro should be the error code that ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). ** The two subsequent arguments should be the name of the OS function that -** failed (e.g. "unlink", "open") and the the associated file-system path, +** failed (e.g. "unlink", "open") and the associated file-system path, ** if any. */ #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) @@ -983,7 +1133,7 @@ static int unixLogErrorAtLine( zErr = aErr; /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, - ** assume that the system provides the the GNU version of strerror_r() that + ** assume that the system provides the GNU version of strerror_r() that ** returns a pointer to a buffer containing the error message. That pointer ** may point to aErr[], or it may point to some static storage somewhere. ** Otherwise, assume that the system provides the POSIX version of @@ -1007,7 +1157,6 @@ static int unixLogErrorAtLine( zErr = strerror(iErrno); #endif - assert( errcode!=SQLITE_OK ); if( zPath==0 ) zPath = ""; sqlite3_log(errcode, "os_unix.c:%d: (%d) %s(%s) - %s", @@ -1172,6 +1321,56 @@ static int findInodeInfo( return SQLITE_OK; } +/* +** Return TRUE if pFile has been renamed or unlinked since it was first opened. +*/ +static int fileHasMoved(unixFile *pFile){ + struct stat buf; + return pFile->pInode!=0 && + (osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino); +} + + +/* +** Check a unixFile that is a database. Verify the following: +** +** (1) There is exactly one hard link on the file +** (2) The file is not a symbolic link +** (3) The file has not been renamed or unlinked +** +** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. +*/ +static void verifyDbFile(unixFile *pFile){ + struct stat buf; + int rc; + if( pFile->ctrlFlags & UNIXFILE_WARNED ){ + /* One or more of the following warnings have already been issued. Do not + ** repeat them so as not to clutter the error log */ + return; + } + rc = osFstat(pFile->h, &buf); + if( rc!=0 ){ + sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){ + sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( buf.st_nlink>1 ){ + sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( fileHasMoved(pFile) ){ + sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } +} + /* ** This routine checks if there is a RESERVED lock held on the specified @@ -1331,14 +1530,14 @@ static int unixLock(sqlite3_file *id, int eFileLock){ */ int rc = SQLITE_OK; unixFile *pFile = (unixFile*)id; - unixInodeInfo *pInode = pFile->pInode; + unixInodeInfo *pInode; struct flock lock; int tErrno = 0; assert( pFile ); OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, azFileLock(eFileLock), azFileLock(pFile->eFileLock), - azFileLock(pInode->eFileLock), pInode->nShared , getpid())); + azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared , getpid())); /* If there is already a lock of this type or more restrictive on the ** unixFile, do nothing. Don't use the end_lock: exit path, as @@ -1479,7 +1678,7 @@ static int unixLock(sqlite3_file *id, int eFileLock){ } -#ifndef NDEBUG +#ifdef SQLITE_DEBUG /* Set up the transaction-counter change checking flags when ** transitioning from a SHARED to a RESERVED lock. The change ** from SHARED to RESERVED marks the beginning of a normal @@ -1542,7 +1741,6 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ unixInodeInfo *pInode; struct flock lock; int rc = SQLITE_OK; - int h; assert( pFile ); OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, @@ -1554,16 +1752,12 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ return SQLITE_OK; } unixEnterMutex(); - h = pFile->h; pInode = pFile->pInode; assert( pInode->nShared!=0 ); if( pFile->eFileLock>SHARED_LOCK ){ assert( pInode->eFileLock==pFile->eFileLock ); - SimulateIOErrorBenign(1); - SimulateIOError( h=(-1) ) - SimulateIOErrorBenign(0); -#ifndef NDEBUG +#ifdef SQLITE_DEBUG /* When reducing a lock such that other processes can start ** reading the database file again, make sure that the ** transaction counter was updated if any part of the database @@ -1572,11 +1766,6 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ ** the file has changed and hence might not know to flush their ** cache. The use of a stale cache can lead to database corruption. */ -#if 0 - assert( pFile->inNormalWrite==0 - || pFile->dbUpdate==0 - || pFile->transCntrChng==1 ); -#endif pFile->inNormalWrite = 0; #endif @@ -1678,14 +1867,11 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = lock.l_len = 0L; - SimulateIOErrorBenign(1); - SimulateIOError( h=(-1) ) - SimulateIOErrorBenign(0); if( unixFileLock(pFile, &lock)==0 ){ pInode->eFileLock = NO_LOCK; }else{ rc = SQLITE_IOERR_UNLOCK; - pFile->lastErrno = errno; + pFile->lastErrno = errno; pInode->eFileLock = NO_LOCK; pFile->eFileLock = NO_LOCK; } @@ -1701,7 +1887,7 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ closePendingFds(pFile); } } - + end_unlock: unixLeaveMutex(); if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; @@ -1716,9 +1902,17 @@ end_unlock: ** the requested locking level, this routine is a no-op. */ static int unixUnlock(sqlite3_file *id, int eFileLock){ +#if SQLITE_MAX_MMAP_SIZE>0 + assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); +#endif return posixUnlock(id, eFileLock, 0); } +#if SQLITE_MAX_MMAP_SIZE>0 +static int unixMapfile(unixFile *pFd, i64 nByte); +static void unixUnmapfile(unixFile *pFd); +#endif + /* ** This function performs the parts of the "close file" operation ** common to all locking schemes. It closes the directory and file @@ -1731,18 +1925,17 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){ */ static int closeUnixFile(sqlite3_file *id){ unixFile *pFile = (unixFile*)id; - if( pFile->dirfd>=0 ){ - robust_close(pFile, pFile->dirfd, __LINE__); - pFile->dirfd=-1; - } +#if SQLITE_MAX_MMAP_SIZE>0 + unixUnmapfile(pFile); +#endif if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); pFile->h = -1; } #if OS_VXWORKS if( pFile->pId ){ - if( pFile->isDelete ){ - unlink(pFile->pId->zCanonicalName); + if( pFile->ctrlFlags & UNIXFILE_DELETE ){ + osUnlink(pFile->pId->zCanonicalName); } vxworksReleaseFileId(pFile->pId); pFile->pId = 0; @@ -1761,6 +1954,7 @@ static int closeUnixFile(sqlite3_file *id){ static int unixClose(sqlite3_file *id){ int rc = SQLITE_OK; unixFile *pFile = (unixFile *)id; + verifyDbFile(pFile); unixUnlock(id, NO_LOCK); unixEnterMutex(); @@ -1829,9 +2023,9 @@ static int nolockClose(sqlite3_file *id) { /****************************************************************************** ************************* Begin dot-file Locking ****************************** ** -** The dotfile locking implementation uses the existance of separate lock -** files in order to control access to the database. This works on just -** about every filesystem imaginable. But there are serious downsides: +** The dotfile locking implementation uses the existence of separate lock +** files (really a directory) to control access to the database. This works +** on just about every filesystem imaginable. But there are serious downsides: ** ** (1) There is zero concurrency. A single reader blocks all other ** connections from reading or writing the database. @@ -1842,15 +2036,15 @@ static int nolockClose(sqlite3_file *id) { ** Nevertheless, a dotlock is an appropriate locking mode for use if no ** other locking strategy is available. ** -** Dotfile locking works by creating a file in the same directory as the -** database and with the same name but with a ".lock" extension added. -** The existance of a lock file implies an EXCLUSIVE lock. All other lock -** types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. +** Dotfile locking works by creating a subdirectory in the same directory as +** the database and with the same name but with a ".lock" extension added. +** The existence of a lock directory implies an EXCLUSIVE lock. All other +** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. */ /* ** The file suffix added to the data base filename in order to create the -** lock file. +** lock directory. */ #define DOTLOCK_SUFFIX ".lock" @@ -1917,7 +2111,6 @@ static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { */ static int dotlockLock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; - int fd; char *zLockFile = (char *)pFile->lockingContext; int rc = SQLITE_OK; @@ -1927,17 +2120,19 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { */ if( pFile->eFileLock > NO_LOCK ){ pFile->eFileLock = eFileLock; -#if !OS_VXWORKS /* Always update the timestamp on the old file */ +#ifdef HAVE_UTIME + utime(zLockFile, NULL); +#else utimes(zLockFile, NULL); #endif return SQLITE_OK; } /* grab an exclusive lock */ - fd = robust_open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600); - if( fd<0 ){ - /* failed to open/create the file, someone else may have stolen the lock */ + rc = osMkdir(zLockFile, 0777); + if( rc<0 ){ + /* failed to open/create the lock directory */ int tErrno = errno; if( EEXIST == tErrno ){ rc = SQLITE_BUSY; @@ -1949,7 +2144,6 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { } return rc; } - robust_close(pFile, fd, __LINE__); /* got it, set the type and return ok */ pFile->eFileLock = eFileLock; @@ -1968,10 +2162,11 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { static int dotlockUnlock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; char *zLockFile = (char *)pFile->lockingContext; + int rc; assert( pFile ); OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, - pFile->eFileLock, getpid())); + pFile->eFileLock, getpid())); assert( eFileLock<=SHARED_LOCK ); /* no-op if possible */ @@ -1989,9 +2184,11 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) { /* To fully unlock the database, delete the lock file */ assert( eFileLock==NO_LOCK ); - if( unlink(zLockFile) ){ - int rc = 0; + rc = osRmdir(zLockFile); + if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile); + if( rc<0 ){ int tErrno = errno; + rc = 0; if( ENOENT != tErrno ){ rc = SQLITE_IOERR_UNLOCK; } @@ -2008,13 +2205,13 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) { ** Close a file. Make sure the lock has been released before closing. */ static int dotlockClose(sqlite3_file *id) { - int rc; + int rc = SQLITE_OK; if( id ){ unixFile *pFile = (unixFile*)id; dotlockUnlock(id, NO_LOCK); sqlite3_free(pFile->lockingContext); + rc = closeUnixFile(id); } - rc = closeUnixFile(id); return rc; } /****************** End of the dot-file lock implementation ******************* @@ -2218,10 +2415,12 @@ static int flockUnlock(sqlite3_file *id, int eFileLock) { ** Close a file. */ static int flockClose(sqlite3_file *id) { + int rc = SQLITE_OK; if( id ){ flockUnlock(id, NO_LOCK); + rc = closeUnixFile(id); } - return closeUnixFile(id); + return rc; } #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ @@ -2356,7 +2555,7 @@ static int semUnlock(sqlite3_file *id, int eFileLock) { assert( pFile ); assert( pSem ); OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, - pFile->eFileLock, getpid())); + pFile->eFileLock, getpid())); assert( eFileLock<=SHARED_LOCK ); /* no-op if possible */ @@ -2495,11 +2694,12 @@ static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ int rc = SQLITE_OK; int reserved = 0; unixFile *pFile = (unixFile*)id; + afpLockingContext *context; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); - afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; + context = (afpLockingContext *) pFile->lockingContext; if( context->reserved ){ *pResOut = 1; return SQLITE_OK; @@ -2639,7 +2839,7 @@ static int afpLock(sqlite3_file *id, int eFileLock){ ** operating system calls for the specified lock. */ if( eFileLock==SHARED_LOCK ){ - int lrc1, lrc2, lrc1Errno; + int lrc1, lrc2, lrc1Errno = 0; long lk, mask; assert( pInode->nShared==0 ); @@ -2770,7 +2970,7 @@ static int afpUnlock(sqlite3_file *id, int eFileLock) { SimulateIOError( h=(-1) ) SimulateIOErrorBenign(0); -#ifndef NDEBUG +#ifdef SQLITE_DEBUG /* When reducing a lock such that other processes can start ** reading the database file again, make sure that the ** transaction counter was updated if any part of the database @@ -2926,35 +3126,51 @@ static int nfsUnlock(sqlite3_file *id, int eFileLock){ */ static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ int got; + int prior = 0; #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) i64 newOffset; #endif TIMER_START; + assert( cnt==(cnt&0x1ffff) ); + assert( id->h>2 ); + cnt &= 0x1ffff; + do{ #if defined(USE_PREAD) - do{ got = osPread(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); - SimulateIOError( got = -1 ); + got = osPread(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); #elif defined(USE_PREAD64) - do{ got = osPread64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR); - SimulateIOError( got = -1 ); + got = osPread64(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); #else - newOffset = lseek(id->h, offset, SEEK_SET); - SimulateIOError( newOffset-- ); - if( newOffset!=offset ){ - if( newOffset == -1 ){ - ((unixFile*)id)->lastErrno = errno; - }else{ - ((unixFile*)id)->lastErrno = 0; + newOffset = lseek(id->h, offset, SEEK_SET); + SimulateIOError( newOffset-- ); + if( newOffset!=offset ){ + if( newOffset == -1 ){ + ((unixFile*)id)->lastErrno = errno; + }else{ + ((unixFile*)id)->lastErrno = 0; + } + return -1; } - return -1; - } - do{ got = osRead(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); + got = osRead(id->h, pBuf, cnt); #endif + if( got==cnt ) break; + if( got<0 ){ + if( errno==EINTR ){ got = 1; continue; } + prior = 0; + ((unixFile*)id)->lastErrno = errno; + break; + }else if( got>0 ){ + cnt -= got; + offset += got; + prior += got; + pBuf = (void*)(got + (char*)pBuf); + } + }while( got>0 ); TIMER_END; - if( got<0 ){ - ((unixFile*)id)->lastErrno = errno; - } - OSTRACE(("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED)); - return got; + OSTRACE(("READ %-3d %5d %7lld %llu\n", + id->h, got+prior, offset-prior, TIMER_ELAPSED)); + return got+prior; } /* @@ -2971,6 +3187,8 @@ static int unixRead( unixFile *pFile = (unixFile *)id; int got; assert( id ); + assert( offset>=0 ); + assert( amt>0 ); /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ @@ -2981,6 +3199,23 @@ static int unixRead( ); #endif +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offset<pFile->mmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + got = seekAndRead(pFile, offset, pBuf, amt); if( got==amt ){ return SQLITE_OK; @@ -2996,42 +3231,60 @@ static int unixRead( } /* -** Seek to the offset in id->offset then read cnt bytes into pBuf. -** Return the number of bytes actually read. Update the offset. -** -** To avoid stomping the errno value on a failed write the lastErrno value -** is set before returning. +** Attempt to seek the file-descriptor passed as the first argument to +** absolute offset iOff, then attempt to write nBuf bytes of data from +** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, +** return the actual number of bytes written (which may be less than +** nBuf). */ -static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ - int got; -#if (!defined(USE_PREAD) && !defined(USE_PREAD64)) - i64 newOffset; -#endif +static int seekAndWriteFd( + int fd, /* File descriptor to write to */ + i64 iOff, /* File offset to begin writing at */ + const void *pBuf, /* Copy data from this buffer to the file */ + int nBuf, /* Size of buffer pBuf in bytes */ + int *piErrno /* OUT: Error number if error occurs */ +){ + int rc = 0; /* Value returned by system call */ + + assert( nBuf==(nBuf&0x1ffff) ); + assert( fd>2 ); + nBuf &= 0x1ffff; TIMER_START; + #if defined(USE_PREAD) - do{ got = osPwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); + do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); #elif defined(USE_PREAD64) - do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR); + do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); #else - newOffset = lseek(id->h, offset, SEEK_SET); - SimulateIOError( newOffset-- ); - if( newOffset!=offset ){ - if( newOffset == -1 ){ - ((unixFile*)id)->lastErrno = errno; - }else{ - ((unixFile*)id)->lastErrno = 0; + do{ + i64 iSeek = lseek(fd, iOff, SEEK_SET); + SimulateIOError( iSeek-- ); + + if( iSeek!=iOff ){ + if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0); + return -1; } - return -1; - } - do{ got = osWrite(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); + rc = osWrite(fd, pBuf, nBuf); + }while( rc<0 && errno==EINTR ); #endif + TIMER_END; - if( got<0 ){ - ((unixFile*)id)->lastErrno = errno; - } + OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); - OSTRACE(("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED)); - return got; + if( rc<0 && piErrno ) *piErrno = errno; + return rc; +} + + +/* +** Seek to the offset in id->offset then read cnt bytes into pBuf. +** Return the number of bytes actually read. Update the offset. +** +** To avoid stomping the errno value on a failed write the lastErrno value +** is set before returning. +*/ +static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ + return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); } @@ -3059,7 +3312,7 @@ static int unixWrite( ); #endif -#ifndef NDEBUG +#ifdef SQLITE_DEBUG /* If we are doing a normal write to a database file (as opposed to ** doing a hot-journal rollback or a write to some file other than a ** normal database file) then record the fact that the database @@ -3081,6 +3334,23 @@ static int unixWrite( } #endif +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offset<pFile->mmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){ amt -= wrote; offset += wrote; @@ -3090,7 +3360,7 @@ static int unixWrite( SimulateDiskfullError(( wrote=0, amt=1 )); if( amt>0 ){ - if( wrote<0 ){ + if( wrote<0 && pFile->lastErrno!=ENOSPC ){ /* lastErrno set by seekAndWrite */ return SQLITE_IOERR_WRITE; }else{ @@ -3113,11 +3383,11 @@ int sqlite3_fullsync_count = 0; /* ** We do not trust systems to provide a working fdatasync(). Some do. -** Others do no. To be safe, we will stick with the (slower) fsync(). -** If you know that your system does support fdatasync() correctly, +** Others do no. To be safe, we will stick with the (slightly slower) +** fsync(). If you know that your system does support fdatasync() correctly, ** then simply compile with -Dfdatasync=fdatasync */ -#if !defined(fdatasync) && !defined(__linux__) +#if !defined(fdatasync) # define fdatasync fsync #endif @@ -3226,6 +3496,47 @@ static int full_fsync(int fd, int fullSync, int dataOnly){ } /* +** Open a file descriptor to the directory containing file zFilename. +** If successful, *pFd is set to the opened file descriptor and +** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM +** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined +** value. +** +** The directory file descriptor is used for only one thing - to +** fsync() a directory to make sure file creation and deletion events +** are flushed to disk. Such fsyncs are not needed on newer +** journaling filesystems, but are required on older filesystems. +** +** This routine can be overridden using the xSetSysCall interface. +** The ability to override this routine was added in support of the +** chromium sandbox. Opening a directory is a security risk (we are +** told) so making it overrideable allows the chromium sandbox to +** replace this routine with a harmless no-op. To make this routine +** a no-op, replace it with a stub that returns SQLITE_OK but leaves +** *pFd set to a negative number. +** +** If SQLITE_OK is returned, the caller is responsible for closing +** the file descriptor *pFd using close(). +*/ +static int openDirectory(const char *zFilename, int *pFd){ + int ii; + int fd = -1; + char zDirname[MAX_PATHNAME+1]; + + sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); + for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--); + if( ii>0 ){ + zDirname[ii] = '\0'; + fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); + if( fd>=0 ){ + OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); + } + } + *pFd = fd; + return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname)); +} + +/* ** Make sure all writes to a particular file are committed to disk. ** ** If dataOnly==0 then both the file itself and its metadata (file @@ -3265,28 +3576,23 @@ static int unixSync(sqlite3_file *id, int flags){ pFile->lastErrno = errno; return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); } - if( pFile->dirfd>=0 ){ - OSTRACE(("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd, + + /* Also fsync the directory containing the file if the DIRSYNC flag + ** is set. This is a one-time occurrence. Many systems (examples: AIX) + ** are unable to fsync a directory, so ignore errors on the fsync. + */ + if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ + int dirfd; + OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, HAVE_FULLFSYNC, isFullsync)); -#ifndef SQLITE_DISABLE_DIRSYNC - /* The directory sync is only attempted if full_fsync is - ** turned off or unavailable. If a full_fsync occurred above, - ** then the directory sync is superfluous. - */ - if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){ - /* - ** We have received multiple reports of fsync() returning - ** errors when applied to directories on certain file systems. - ** A failed directory sync is not a big deal. So it seems - ** better to ignore the error. Ticket #1657 - */ - /* pFile->lastErrno = errno; */ - /* return SQLITE_IOERR; */ + rc = osOpenDirectory(pFile->zPath, &dirfd); + if( rc==SQLITE_OK && dirfd>=0 ){ + full_fsync(dirfd, 0, 0); + robust_close(pFile, dirfd, __LINE__); + }else if( rc==SQLITE_CANTOPEN ){ + rc = SQLITE_OK; } -#endif - /* Only need to sync once, so close the directory when we are done */ - robust_close(pFile, pFile->dirfd, __LINE__); - pFile->dirfd = -1; + pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; } return rc; } @@ -3305,7 +3611,7 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){ ** actual file size after the operation may be larger than the requested ** size). */ - if( pFile->szChunk ){ + if( pFile->szChunk>0 ){ nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; } @@ -3314,7 +3620,7 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){ pFile->lastErrno = errno; return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); }else{ -#ifndef NDEBUG +#ifdef SQLITE_DEBUG /* If we are doing a normal write to a database file (as opposed to ** doing a hot-journal rollback or a write to some file other than a ** normal database file) and we truncate the file to zero length, @@ -3327,6 +3633,16 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){ } #endif +#if SQLITE_MAX_MMAP_SIZE>0 + /* If the file was just truncated to a size smaller than the currently + ** mapped region, reduce the effective mapping size as well. SQLite will + ** use read() and write() to access data beyond this point from now on. + */ + if( nByte<pFile->mmapSize ){ + pFile->mmapSize = nByte; + } +#endif + return SQLITE_OK; } } @@ -3368,14 +3684,12 @@ static int proxyFileControl(sqlite3_file*,int,void*); /* ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT -** file-control operation. -** -** If the user has configured a chunk-size for this file, it could be -** that the file needs to be extended at this point. Otherwise, the -** SQLITE_FCNTL_SIZE_HINT operation is a no-op for Unix. +** file-control operation. Enlarge the database to nBytes in size +** (rounded up to the next chunk-size). If the database is already +** nBytes or larger, this routine is a no-op. */ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ - if( pFile->szChunk ){ + if( pFile->szChunk>0 ){ i64 nSize; /* Required file size */ struct stat buf; /* Used to hold return values of fstat() */ @@ -3417,30 +3731,111 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ } } +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ + int rc; + if( pFile->szChunk<=0 ){ + if( robust_ftruncate(pFile->h, nByte) ){ + pFile->lastErrno = errno; + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); + } + } + + rc = unixMapfile(pFile, nByte); + return rc; + } +#endif + return SQLITE_OK; } /* +** If *pArg is inititially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + +/* Forward declaration */ +static int unixGetTempname(int nBuf, char *zBuf); + +/* ** Information and control of an open file handle. */ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ + unixFile *pFile = (unixFile*)id; switch( op ){ case SQLITE_FCNTL_LOCKSTATE: { - *(int*)pArg = ((unixFile*)id)->eFileLock; + *(int*)pArg = pFile->eFileLock; return SQLITE_OK; } case SQLITE_LAST_ERRNO: { - *(int*)pArg = ((unixFile*)id)->lastErrno; + *(int*)pArg = pFile->lastErrno; return SQLITE_OK; } case SQLITE_FCNTL_CHUNK_SIZE: { - ((unixFile*)id)->szChunk = *(int *)pArg; + pFile->szChunk = *(int *)pArg; return SQLITE_OK; } case SQLITE_FCNTL_SIZE_HINT: { - return fcntlSizeHint((unixFile *)id, *(i64 *)pArg); + int rc; + SimulateIOErrorBenign(1); + rc = fcntlSizeHint(pFile, *(i64 *)pArg); + SimulateIOErrorBenign(0); + return rc; + } + case SQLITE_FCNTL_PERSIST_WAL: { + unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_VFSNAME: { + *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); + return SQLITE_OK; + } + case SQLITE_FCNTL_TEMPFILENAME: { + char *zTFile = sqlite3_malloc( pFile->pVfs->mxPathname ); + if( zTFile ){ + unixGetTempname(pFile->pVfs->mxPathname, zTFile); + *(char**)pArg = zTFile; + } + return SQLITE_OK; } -#ifndef NDEBUG + case SQLITE_FCNTL_HAS_MOVED: { + *(int*)pArg = fileHasMoved(pFile); + return SQLITE_OK; + } +#if SQLITE_MAX_MMAP_SIZE>0 + case SQLITE_FCNTL_MMAP_SIZE: { + i64 newLimit = *(i64*)pArg; + int rc = SQLITE_OK; + if( newLimit>sqlite3GlobalConfig.mxMmap ){ + newLimit = sqlite3GlobalConfig.mxMmap; + } + *(i64*)pArg = pFile->mmapSizeMax; + if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ + pFile->mmapSizeMax = newLimit; + if( pFile->mmapSize>0 ){ + unixUnmapfile(pFile); + rc = unixMapfile(pFile, -1); + } + } + return rc; + } +#endif +#ifdef SQLITE_DEBUG /* The pager calls this method to signal that it has done ** a rollback and that the database is therefore unchanged and ** it hence it is OK for the transaction change counter to be @@ -3457,9 +3852,6 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return proxyFileControl(id,op,pArg); } #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ - case SQLITE_FCNTL_SYNC_OMITTED: { - return SQLITE_OK; /* A no-op */ - } } return SQLITE_NOTFOUND; } @@ -3474,17 +3866,117 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ ** a database and its journal file) that the sector size will be the ** same for both. */ +#ifndef __QNXNTO__ static int unixSectorSize(sqlite3_file *NotUsed){ UNUSED_PARAMETER(NotUsed); return SQLITE_DEFAULT_SECTOR_SIZE; } +#endif /* -** Return the device characteristics for the file. This is always 0 for unix. +** The following version of unixSectorSize() is optimized for QNX. */ -static int unixDeviceCharacteristics(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return 0; +#ifdef __QNXNTO__ +#include <sys/dcmd_blk.h> +#include <sys/statvfs.h> +static int unixSectorSize(sqlite3_file *id){ + unixFile *pFile = (unixFile*)id; + if( pFile->sectorSize == 0 ){ + struct statvfs fsInfo; + + /* Set defaults for non-supported filesystems */ + pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + pFile->deviceCharacteristics = 0; + if( fstatvfs(pFile->h, &fsInfo) == -1 ) { + return pFile->sectorSize; + } + + if( !strcmp(fsInfo.f_basetype, "tmp") ) { + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( strstr(fsInfo.f_basetype, "etfs") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* etfs cluster size writes are atomic */ + (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* full bitset of atomics from max sector size and smaller */ + ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( strstr(fsInfo.f_basetype, "dos") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* full bitset of atomics from max sector size and smaller */ + ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else{ + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + 0; + } + } + /* Last chance verification. If the sector size isn't a multiple of 512 + ** then it isn't valid.*/ + if( pFile->sectorSize % 512 != 0 ){ + pFile->deviceCharacteristics = 0; + pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + } + return pFile->sectorSize; +} +#endif /* __QNXNTO__ */ + +/* +** Return the device characteristics for the file. +** +** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. +** However, that choice is contraversial since technically the underlying +** file system does not always provide powersafe overwrites. (In other +** words, after a power-loss event, parts of the file that were never +** written might end up being altered.) However, non-PSOW behavior is very, +** very rare. And asserting PSOW makes a large reduction in the amount +** of required I/O for journaling, since a lot of padding is eliminated. +** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control +** available to turn it off and URI query parameter available to turn it off. +*/ +static int unixDeviceCharacteristics(sqlite3_file *id){ + unixFile *p = (unixFile*)id; + int rc = 0; +#ifdef __QNXNTO__ + if( p->sectorSize==0 ) unixSectorSize(id); + rc = p->deviceCharacteristics; +#endif + if( p->ctrlFlags & UNIXFILE_PSOW ){ + rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; + } + return rc; } #ifndef SQLITE_OMIT_WAL @@ -3525,7 +4017,8 @@ struct unixShmNode { char *zFilename; /* Name of the mmapped file */ int h; /* Open file descriptor */ int szRegion; /* Size of shared-memory regions */ - int nRegion; /* Size of array apRegion */ + u16 nRegion; /* Size of array apRegion */ + u8 isReadonly; /* True if read-only */ char **apRegion; /* Array of mapped shared-memory regions */ int nRef; /* Number of unixShm objects pointing to this */ unixShm *pFirst; /* All unixShm objects pointing to this */ @@ -3553,11 +4046,9 @@ struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ u8 hasMutex; /* True if holding the unixShmNode mutex */ + u8 id; /* Id of this connection within its unixShmNode */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ -#ifdef SQLITE_DEBUG - u8 id; /* Id of this connection within its unixShmNode */ -#endif }; /* @@ -3606,7 +4097,7 @@ static int unixShmSystemLock( #ifdef SQLITE_DEBUG { u16 mask; OSTRACE(("SHM-LOCK ")); - mask = (1<<(ofst+n)) - (1<<ofst); + mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); if( rc==SQLITE_OK ){ if( lockType==F_UNLCK ){ OSTRACE(("unlock %d ok", ofst)); @@ -3653,10 +4144,10 @@ static void unixShmPurge(unixFile *pFd){ if( p && p->nRef==0 ){ int i; assert( p->pInode==pFd->pInode ); - if( p->mutex ) sqlite3_mutex_free(p->mutex); + sqlite3_mutex_free(p->mutex); for(i=0; i<p->nRegion; i++){ if( p->h>=0 ){ - munmap(p->apRegion[i], p->szRegion); + osMunmap(p->apRegion[i], p->szRegion); }else{ sqlite3_free(p->apRegion[i]); } @@ -3731,8 +4222,7 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ /* Call fstat() to figure out the permissions on the database file. If ** a new *-shm file is created, an attempt will be made to create it - ** with the same permissions. The actual permissions the file is created - ** with are subject to the current umask setting. + ** with the same permissions. */ if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){ rc = SQLITE_IOERR_FSTAT; @@ -3740,16 +4230,16 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ } #ifdef SQLITE_SHM_DIRECTORY - nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 30; + nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; #else - nShmFilename = 5 + (int)strlen(pDbFd->zPath); + nShmFilename = 6 + (int)strlen(pDbFd->zPath); #endif pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename ); if( pShmNode==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } - memset(pShmNode, 0, sizeof(*pShmNode)); + memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1]; #ifdef SQLITE_SHM_DIRECTORY sqlite3_snprintf(nShmFilename, zShmFilename, @@ -3757,6 +4247,7 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ (u32)sStat.st_ino, (u32)sStat.st_dev); #else sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", pDbFd->zPath); + sqlite3FileSuffix3(pDbFd->zPath, zShmFilename); #endif pShmNode->h = -1; pDbFd->pInode->pShmNode = pShmNode; @@ -3768,12 +4259,22 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ } if( pInode->bProcessLock==0 ){ - pShmNode->h = robust_open(zShmFilename, O_RDWR|O_CREAT, - (sStat.st_mode & 0777)); + int openFlags = O_RDWR | O_CREAT; + if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ + openFlags = O_RDONLY; + pShmNode->isReadonly = 1; + } + pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777)); if( pShmNode->h<0 ){ rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); goto shm_open_err; } + + /* If this process is running as root, make sure that the SHM file + ** is owned by the same user that owns the original database. Otherwise, + ** the original owner will not be able to connect. + */ + osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid); /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. @@ -3886,16 +4387,32 @@ static int unixShmMap( if( sStat.st_size<nByte ){ /* The requested memory region does not exist. If bExtend is set to ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. - ** - ** Alternatively, if bExtend is true, use ftruncate() to allocate - ** the requested memory region. */ - if( !bExtend ) goto shmpage_out; - if( robust_ftruncate(pShmNode->h, nByte) ){ - rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate", - pShmNode->zFilename); + if( !bExtend ){ goto shmpage_out; } + + /* Alternatively, if bExtend is true, extend the file. Do this by + ** writing a single byte to the end of each (OS) page being + ** allocated or extended. Technically, we need only write to the + ** last page in order to extend the file. But writing to all new + ** pages forces the OS to allocate them immediately, which reduces + ** the chances of SIGBUS while accessing the mapped region later on. + */ + else{ + static const int pgsz = 4096; + int iPg; + + /* Write to the last byte of each newly allocated or extended page */ + assert( (nByte % pgsz)==0 ); + for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ + if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){ + const char *zFile = pShmNode->zFilename; + rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); + goto shmpage_out; + } + } + } } } @@ -3911,11 +4428,12 @@ static int unixShmMap( while(pShmNode->nRegion<=iRegion){ void *pMem; if( pShmNode->h>=0 ){ - pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE, - MAP_SHARED, pShmNode->h, pShmNode->nRegion*szRegion + pMem = osMmap(0, szRegion, + pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, + MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion ); if( pMem==MAP_FAILED ){ - rc = SQLITE_IOERR; + rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); goto shmpage_out; } }else{ @@ -3937,6 +4455,7 @@ shmpage_out: }else{ *pp = 0; } + if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; sqlite3_mutex_leave(pShmNode->mutex); return rc; } @@ -4110,7 +4629,7 @@ static int unixShmUnmap( assert( pShmNode->nRef>0 ); pShmNode->nRef--; if( pShmNode->nRef==0 ){ - if( deleteFlag && pShmNode->h>=0 ) unlink(pShmNode->zFilename); + if( deleteFlag && pShmNode->h>=0 ) osUnlink(pShmNode->zFilename); unixShmPurge(pDbFd); } unixLeaveMutex(); @@ -4126,6 +4645,236 @@ static int unixShmUnmap( # define unixShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** If it is currently memory mapped, unmap file pFd. +*/ +static void unixUnmapfile(unixFile *pFd){ + assert( pFd->nFetchOut==0 ); + if( pFd->pMapRegion ){ + osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); + pFd->pMapRegion = 0; + pFd->mmapSize = 0; + pFd->mmapSizeActual = 0; + } +} + +/* +** Return the system page size. +*/ +static int unixGetPagesize(void){ +#if HAVE_MREMAP + return 512; +#elif defined(_BSD_SOURCE) + return getpagesize(); +#else + return (int)sysconf(_SC_PAGESIZE); +#endif +} + +/* +** Attempt to set the size of the memory mapping maintained by file +** descriptor pFd to nNew bytes. Any existing mapping is discarded. +** +** If successful, this function sets the following variables: +** +** unixFile.pMapRegion +** unixFile.mmapSize +** unixFile.mmapSizeActual +** +** If unsuccessful, an error message is logged via sqlite3_log() and +** the three variables above are zeroed. In this case SQLite should +** continue accessing the database using the xRead() and xWrite() +** methods. +*/ +static void unixRemapfile( + unixFile *pFd, /* File descriptor object */ + i64 nNew /* Required mapping size */ +){ + const char *zErr = "mmap"; + int h = pFd->h; /* File descriptor open on db file */ + u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ + i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ + u8 *pNew = 0; /* Location of new mapping */ + int flags = PROT_READ; /* Flags to pass to mmap() */ + + assert( pFd->nFetchOut==0 ); + assert( nNew>pFd->mmapSize ); + assert( nNew<=pFd->mmapSizeMax ); + assert( nNew>0 ); + assert( pFd->mmapSizeActual>=pFd->mmapSize ); + assert( MAP_FAILED!=0 ); + + if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; + + if( pOrig ){ + const int szSyspage = unixGetPagesize(); + i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); + u8 *pReq = &pOrig[nReuse]; + + /* Unmap any pages of the existing mapping that cannot be reused. */ + if( nReuse!=nOrig ){ + osMunmap(pReq, nOrig-nReuse); + } + +#if HAVE_MREMAP + pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); + zErr = "mremap"; +#else + pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); + if( pNew!=MAP_FAILED ){ + if( pNew!=pReq ){ + osMunmap(pNew, nNew - nReuse); + pNew = 0; + }else{ + pNew = pOrig; + } + } +#endif + + /* The attempt to extend the existing mapping failed. Free it. */ + if( pNew==MAP_FAILED || pNew==0 ){ + osMunmap(pOrig, nReuse); + } + } + + /* If pNew is still NULL, try to create an entirely new mapping. */ + if( pNew==0 ){ + pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); + } + + if( pNew==MAP_FAILED ){ + pNew = 0; + nNew = 0; + unixLogError(SQLITE_OK, zErr, pFd->zPath); + + /* If the mmap() above failed, assume that all subsequent mmap() calls + ** will probably fail too. Fall back to using xRead/xWrite exclusively + ** in this case. */ + pFd->mmapSizeMax = 0; + } + pFd->pMapRegion = (void *)pNew; + pFd->mmapSize = pFd->mmapSizeActual = nNew; +} + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int unixMapfile(unixFile *pFd, i64 nByte){ + i64 nMap = nByte; + int rc; + + assert( nMap>=0 || pFd->nFetchOut==0 ); + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + struct stat statbuf; /* Low-level file information */ + rc = osFstat(pFd->h, &statbuf); + if( rc!=SQLITE_OK ){ + return SQLITE_IOERR_FSTAT; + } + nMap = statbuf.st_size; + } + if( nMap>pFd->mmapSizeMax ){ + nMap = pFd->mmapSizeMax; + } + + if( nMap!=pFd->mmapSize ){ + if( nMap>0 ){ + unixRemapfile(pFd, nMap); + }else{ + unixUnmapfile(pFd); + } + } + + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling unixUnfetch(). +*/ +static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ +#if SQLITE_MAX_MMAP_SIZE>0 + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ +#endif + *pp = 0; + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFd->mmapSizeMax>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = unixMapfile(pFd, -1); + if( rc!=SQLITE_OK ) return rc; + } + if( pFd->mmapSize >= iOff+nAmt ){ + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } +#endif + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to unixFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the unixFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ +#if SQLITE_MAX_MMAP_SIZE>0 + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ + UNUSED_PARAMETER(iOff); + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + if( p ){ + pFd->nFetchOut--; + }else{ + unixUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); +#else + UNUSED_PARAMETER(fd); + UNUSED_PARAMETER(p); + UNUSED_PARAMETER(iOff); +#endif + return SQLITE_OK; +} + /* ** Here ends the implementation of all sqlite3_file methods. ** @@ -4184,7 +4933,9 @@ static const sqlite3_io_methods METHOD = { \ unixShmMap, /* xShmMap */ \ unixShmLock, /* xShmLock */ \ unixShmBarrier, /* xShmBarrier */ \ - unixShmUnmap /* xShmUnmap */ \ + unixShmUnmap, /* xShmUnmap */ \ + unixFetch, /* xFetch */ \ + unixUnfetch, /* xUnfetch */ \ }; \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ @@ -4201,7 +4952,7 @@ static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ IOMETHODS( posixIoFinder, /* Finder function name */ posixIoMethods, /* sqlite3_io_methods object name */ - 2, /* shared memory is enabled */ + 3, /* shared memory and mmap are enabled */ unixClose, /* xClose method */ unixLock, /* xLock method */ unixUnlock, /* xUnlock method */ @@ -4423,12 +5174,9 @@ typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); static int fillInUnixFile( sqlite3_vfs *pVfs, /* Pointer to vfs object */ int h, /* Open file descriptor of file being opened */ - int dirfd, /* Directory file descriptor */ sqlite3_file *pId, /* Write to the unixFile structure here */ const char *zFilename, /* Name of the file being opened */ - int noLock, /* Omit locking if true */ - int isDelete, /* Delete on close if true */ - int isReadOnly /* True if the file is opened read-only */ + int ctrlFlags /* Zero or more UNIXFILE_* values */ ){ const sqlite3_io_methods *pLockingStyle; unixFile *pNew = (unixFile *)pId; @@ -4436,11 +5184,6 @@ static int fillInUnixFile( assert( pNew->pInode==NULL ); - /* Parameter isDelete is only used on vxworks. Express this explicitly - ** here to prevent compiler warnings about unused parameters. - */ - UNUSED_PARAMETER(isDelete); - /* Usually the path zFilename should not be a relative pathname. The ** exception is when opening the proxy "conch" file in builds that ** include the special Apple locking styles. @@ -4452,28 +5195,34 @@ static int fillInUnixFile( assert( zFilename==0 || zFilename[0]=='/' ); #endif + /* No locking occurs in temporary files */ + assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); + OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; - pNew->dirfd = dirfd; + pNew->pVfs = pVfs; pNew->zPath = zFilename; - if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ - pNew->ctrlFlags = UNIXFILE_EXCL; - }else{ - pNew->ctrlFlags = 0; + pNew->ctrlFlags = (u8)ctrlFlags; +#if SQLITE_MAX_MMAP_SIZE>0 + pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; +#endif + if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), + "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pNew->ctrlFlags |= UNIXFILE_PSOW; } - if( isReadOnly ){ - pNew->ctrlFlags |= UNIXFILE_RDONLY; + if( strcmp(pVfs->zName,"unix-excl")==0 ){ + pNew->ctrlFlags |= UNIXFILE_EXCL; } #if OS_VXWORKS pNew->pId = vxworksFindFileId(zFilename); if( pNew->pId==0 ){ - noLock = 1; + ctrlFlags |= UNIXFILE_NOLOCK; rc = SQLITE_NOMEM; } #endif - if( noLock ){ + if( ctrlFlags & UNIXFILE_NOLOCK ){ pLockingStyle = &nolockIoMethods; }else{ pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); @@ -4493,7 +5242,7 @@ static int fillInUnixFile( unixEnterMutex(); rc = findInodeInfo(pNew, &pNew->pInode); if( rc!=SQLITE_OK ){ - /* If an error occured in findInodeInfo(), close the file descriptor + /* If an error occurred in findInodeInfo(), close the file descriptor ** immediately, before releasing the mutex. findInodeInfo() may fail ** in two scenarios: ** @@ -4551,6 +5300,7 @@ static int fillInUnixFile( */ char *zLockFile; int nFilename; + assert( zFilename!=0 ); nFilename = (int)strlen(zFilename) + 6; zLockFile = (char *)sqlite3_malloc(nFilename); if( zLockFile==0 ){ @@ -4590,53 +5340,21 @@ static int fillInUnixFile( if( rc!=SQLITE_OK ){ if( h>=0 ) robust_close(pNew, h, __LINE__); h = -1; - unlink(zFilename); - isDelete = 0; + osUnlink(zFilename); + pNew->ctrlFlags |= UNIXFILE_DELETE; } - pNew->isDelete = isDelete; #endif if( rc!=SQLITE_OK ){ - if( dirfd>=0 ) robust_close(pNew, dirfd, __LINE__); if( h>=0 ) robust_close(pNew, h, __LINE__); }else{ pNew->pMethod = pLockingStyle; OpenCounter(+1); + verifyDbFile(pNew); } return rc; } /* -** Open a file descriptor to the directory containing file zFilename. -** If successful, *pFd is set to the opened file descriptor and -** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM -** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined -** value. -** -** If SQLITE_OK is returned, the caller is responsible for closing -** the file descriptor *pFd using close(). -*/ -static int openDirectory(const char *zFilename, int *pFd){ - int ii; - int fd = -1; - char zDirname[MAX_PATHNAME+1]; - - sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); - for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--); - if( ii>0 ){ - zDirname[ii] = '\0'; - fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); - if( fd>=0 ){ -#ifdef FD_CLOEXEC - osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); -#endif - OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); - } - } - *pFd = fd; - return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname)); -} - -/* ** Return the name of a directory in which to put temporary files. ** If no suitable temporary file directory can be found, return NULL. */ @@ -4644,6 +5362,7 @@ static const char *unixTempFileDir(void){ static const char *azDirs[] = { 0, 0, + 0, "/var/tmp", "/usr/tmp", "/tmp", @@ -4654,7 +5373,8 @@ static const char *unixTempFileDir(void){ const char *zDir = 0; azDirs[0] = sqlite3_temp_directory; - if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); + if( !azDirs[1] ) azDirs[1] = getenv("SQLITE_TMPDIR"); + if( !azDirs[2] ) azDirs[2] = getenv("TMPDIR"); for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){ if( zDir==0 ) continue; if( osStat(zDir, &buf) ) continue; @@ -4690,18 +5410,19 @@ static int unixGetTempname(int nBuf, char *zBuf){ /* Check that the output buffer is large enough for the temporary file ** name. If it is not, return SQLITE_ERROR. */ - if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= (size_t)nBuf ){ + if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 18) >= (size_t)nBuf ){ return SQLITE_ERROR; } do{ - sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir); + sqlite3_snprintf(nBuf-18, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir); j = (int)strlen(zBuf); sqlite3_randomness(15, &zBuf[j]); for(i=0; i<15; i++, j++){ zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; } zBuf[j] = 0; + zBuf[j+1] = 0; }while( osAccess(zBuf,0)==0 ); return SQLITE_OK; } @@ -4750,7 +5471,7 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ ** ** Even if a subsequent open() call does succeed, the consequences of ** not searching for a resusable file descriptor are not dire. */ - if( 0==stat(zPath, &sStat) ){ + if( 0==osStat(zPath, &sStat) ){ unixInodeInfo *pInode; unixEnterMutex(); @@ -4780,23 +5501,31 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ ** written to *pMode. If an IO error occurs, an SQLite error code is ** returned and the value of *pMode is not modified. ** -** If the file being opened is a temporary file, it is always created with -** the octal permissions 0600 (read/writable by owner only). If the file -** is a database or master journal file, it is created with the permissions -** mask SQLITE_DEFAULT_FILE_PERMISSIONS. -** -** Finally, if the file being opened is a WAL or regular journal file, then +** In most cases cases, this routine sets *pMode to 0, which will become +** an indication to robust_open() to create the file using +** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. +** But if the file being opened is a WAL or regular journal file, then ** this function queries the file-system for the permissions on the ** corresponding database file and sets *pMode to this value. Whenever ** possible, WAL and journal files are created using the same permissions ** as the associated database file. +** +** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the +** original filename is unavailable. But 8_3_NAMES is only used for +** FAT filesystems and permissions do not matter there, so just use +** the default permissions. */ static int findCreateFileMode( const char *zPath, /* Path of file (possibly) being created */ int flags, /* Flags passed as 4th argument to xOpen() */ - mode_t *pMode /* OUT: Permissions to open file with */ + mode_t *pMode, /* OUT: Permissions to open file with */ + uid_t *pUid, /* OUT: uid to set on the file */ + gid_t *pGid /* OUT: gid to set on the file */ ){ int rc = SQLITE_OK; /* Return Code */ + *pMode = 0; + *pUid = 0; + *pGid = 0; if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ char zDb[MAX_PATHNAME+1]; /* Database file path */ int nDb; /* Number of valid bytes in zDb */ @@ -4808,27 +5537,35 @@ static int findCreateFileMode( ** ** "<path to db>-journal" ** "<path to db>-wal" - ** "<path to db>-journal-NNNN" - ** "<path to db>-wal-NNNN" + ** "<path to db>-journalNN" + ** "<path to db>-walNN" ** - ** where NNNN is a 4 digit decimal number. The NNNN naming schemes are + ** where NN is a decimal number. The NN naming schemes are ** used by the test_multiplex.c module. */ nDb = sqlite3Strlen30(zPath) - 1; - while( nDb>0 && zPath[nDb]!='l' ) nDb--; - nDb -= ((flags & SQLITE_OPEN_WAL) ? 3 : 7); +#ifdef SQLITE_ENABLE_8_3_NAMES + while( nDb>0 && sqlite3Isalnum(zPath[nDb]) ) nDb--; + if( nDb==0 || zPath[nDb]!='-' ) return SQLITE_OK; +#else + while( zPath[nDb]!='-' ){ + assert( nDb>0 ); + assert( zPath[nDb]!='\n' ); + nDb--; + } +#endif memcpy(zDb, zPath, nDb); zDb[nDb] = '\0'; - if( 0==stat(zDb, &sStat) ){ + if( 0==osStat(zDb, &sStat) ){ *pMode = sStat.st_mode & 0777; + *pUid = sStat.st_uid; + *pGid = sStat.st_gid; }else{ rc = SQLITE_IOERR_FSTAT; } }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ *pMode = 0600; - }else{ - *pMode = SQLITE_DEFAULT_FILE_PERMISSIONS; } return rc; } @@ -4864,11 +5601,11 @@ static int unixOpen( ){ unixFile *p = (unixFile *)pFile; int fd = -1; /* File descriptor returned by open() */ - int dirfd = -1; /* Directory file descriptor */ int openFlags = 0; /* Flags to pass to open() */ int eType = flags&0xFFFFFF00; /* Type of file to open */ int noLock; /* True to omit locking primitives */ int rc = SQLITE_OK; /* Function Return Code */ + int ctrlFlags = 0; /* UNIXFILE_* flags */ int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); @@ -4878,12 +5615,15 @@ static int unixOpen( #if SQLITE_ENABLE_LOCKING_STYLE int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); #endif +#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE + struct statfs fsInfo; +#endif /* If creating a master or main-file journal, this function will open ** a file-descriptor on the directory too. The first time unixSync() ** is called the directory file descriptor will be fsync()ed and close()d. */ - int isOpenDirectory = (isCreate && ( + int syncDir = (isCreate && ( eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_WAL @@ -4892,7 +5632,7 @@ static int unixOpen( /* If argument zPath is a NULL pointer, this function is required to open ** a temporary file. Use this buffer to store the file name in. */ - char zTmpname[MAX_PATHNAME+1]; + char zTmpname[MAX_PATHNAME+2]; const char *zName = zPath; /* Check the following statements are true: @@ -4921,6 +5661,16 @@ static int unixOpen( || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL ); + /* Detect a pid change and reset the PRNG. There is a race condition + ** here such that two or more threads all trying to open databases at + ** the same instant might all reset the PRNG. But multiple resets + ** are harmless. + */ + if( randomnessPid!=getpid() ){ + randomnessPid = getpid(); + sqlite3_randomness(0,0); + } + memset(p, 0, sizeof(unixFile)); if( eType==SQLITE_OPEN_MAIN_DB ){ @@ -4935,14 +5685,24 @@ static int unixOpen( } } p->pUnused = pUnused; + + /* Database filenames are double-zero terminated if they are not + ** URIs with parameters. Hence, they can always be passed into + ** sqlite3_uri_parameter(). */ + assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); + }else if( !zName ){ /* If zName is NULL, the upper layer is requesting a temp file. */ - assert(isDelete && !isOpenDirectory); - rc = unixGetTempname(MAX_PATHNAME+1, zTmpname); + assert(isDelete && !syncDir); + rc = unixGetTempname(MAX_PATHNAME+2, zTmpname); if( rc!=SQLITE_OK ){ return rc; } zName = zTmpname; + + /* Generated temporary filenames are always double-zero terminated + ** for use by sqlite3_uri_parameter(). */ + assert( zName[strlen(zName)+1]==0 ); } /* Determine the value of the flags parameter passed to POSIX function @@ -4957,7 +5717,9 @@ static int unixOpen( if( fd<0 ){ mode_t openMode; /* Permissions to create file with */ - rc = findCreateFileMode(zName, flags, &openMode); + uid_t uid; /* Userid for the file */ + gid_t gid; /* Groupid for the file */ + rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); if( rc!=SQLITE_OK ){ assert( !p->pUnused ); assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); @@ -4978,6 +5740,14 @@ static int unixOpen( rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); goto open_finished; } + + /* If this process is running as root and if creating a new rollback + ** journal or WAL file, set the ownership of the journal or WAL to be + ** the same as the original database. + */ + if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ + osFchown(fd, uid, gid); + } } assert( fd>=0 ); if( pOutFlags ){ @@ -4993,7 +5763,7 @@ static int unixOpen( #if OS_VXWORKS zPath = zName; #else - unlink(zName); + osUnlink(zName); #endif } #if SQLITE_ENABLE_LOCKING_STYLE @@ -5002,31 +5772,12 @@ static int unixOpen( } #endif - if( isOpenDirectory ){ - rc = openDirectory(zPath, &dirfd); - if( rc!=SQLITE_OK ){ - /* It is safe to close fd at this point, because it is guaranteed not - ** to be open on a database file. If it were open on a database file, - ** it would not be safe to close as this would release any locks held - ** on the file by this process. */ - assert( eType!=SQLITE_OPEN_MAIN_DB ); - robust_close(p, fd, __LINE__); - goto open_finished; - } - } - -#ifdef FD_CLOEXEC - osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); -#endif - noLock = eType!=SQLITE_OPEN_MAIN_DB; #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE - struct statfs fsInfo; if( fstatfs(fd, &fsInfo) == -1 ){ ((unixFile*)pFile)->lastErrno = errno; - if( dirfd>=0 ) robust_close(p, dirfd, __LINE__); robust_close(p, fd, __LINE__); return SQLITE_IOERR_ACCESS; } @@ -5034,7 +5785,14 @@ static int unixOpen( ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; } #endif - + + /* Set up appropriate ctrlFlags */ + if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; + if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; + if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; + if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC; + if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; + #if SQLITE_ENABLE_LOCKING_STYLE #if SQLITE_PREFER_PROXY_LOCKING isAutoProxy = 1; @@ -5048,7 +5806,6 @@ static int unixOpen( if( envforce!=NULL ){ useProxy = atoi(envforce)>0; }else{ - struct statfs fsInfo; if( statfs(zPath, &fsInfo) == -1 ){ /* In theory, the close(fd) call is sub-optimal. If the file opened ** with fd is a database file, and there are other connections open @@ -5058,9 +5815,6 @@ static int unixOpen( ** not while other file descriptors opened by the same process on ** the same file are working. */ p->lastErrno = errno; - if( dirfd>=0 ){ - robust_close(p, dirfd, __LINE__); - } robust_close(p, fd, __LINE__); rc = SQLITE_IOERR_ACCESS; goto open_finished; @@ -5068,8 +5822,7 @@ static int unixOpen( useProxy = !(fsInfo.f_flags&MNT_LOCAL); } if( useProxy ){ - rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, - isDelete, isReadonly); + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); if( rc==SQLITE_OK ){ rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); if( rc!=SQLITE_OK ){ @@ -5086,8 +5839,8 @@ static int unixOpen( } #endif - rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, - isDelete, isReadonly); + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); + open_finished: if( rc!=SQLITE_OK ){ sqlite3_free(p->pUnused); @@ -5108,13 +5861,18 @@ static int unixDelete( int rc = SQLITE_OK; UNUSED_PARAMETER(NotUsed); SimulateIOError(return SQLITE_IOERR_DELETE); - if( unlink(zPath)==(-1) && errno!=ENOENT ){ - return unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); + if( osUnlink(zPath)==(-1) ){ + if( errno==ENOENT ){ + rc = SQLITE_IOERR_DELETE_NOENT; + }else{ + rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); + } + return rc; } #ifndef SQLITE_DISABLE_DIRSYNC - if( dirSync ){ + if( (dirSync & 1)!=0 ){ int fd; - rc = openDirectory(zPath, &fd); + rc = osOpenDirectory(zPath, &fd); if( rc==SQLITE_OK ){ #if OS_VXWORKS if( fsync(fd)==-1 ) @@ -5125,6 +5883,8 @@ static int unixDelete( rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); } robust_close(0, fd, __LINE__); + }else if( rc==SQLITE_CANTOPEN ){ + rc = SQLITE_OK; } } #endif @@ -5132,7 +5892,7 @@ static int unixDelete( } /* -** Test the existance of or access permissions of file zPath. The +** Test the existence of or access permissions of file zPath. The ** test performed depends on the value of flags: ** ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists @@ -5167,7 +5927,7 @@ static int unixAccess( *pResOut = (osAccess(zPath, amode)==0); if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){ struct stat buf; - if( 0==stat(zPath, &buf) && buf.st_size==0 ){ + if( 0==osStat(zPath, &buf) && buf.st_size==0 ){ *pResOut = 0; } } @@ -5298,20 +6058,20 @@ static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ ** tests repeatable. */ memset(zBuf, 0, nBuf); + randomnessPid = getpid(); #if !defined(SQLITE_TEST) { - int pid, fd; + int fd, got; fd = robust_open("/dev/urandom", O_RDONLY, 0); if( fd<0 ){ time_t t; time(&t); memcpy(zBuf, &t, sizeof(t)); - pid = getpid(); - memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid)); - assert( sizeof(t)+sizeof(pid)<=(size_t)nBuf ); - nBuf = sizeof(t) + sizeof(pid); + memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); + assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); + nBuf = sizeof(t) + sizeof(randomnessPid); }else{ - do{ nBuf = osRead(fd, zBuf, nBuf); }while( nBuf<0 && errno==EINTR ); + do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); robust_close(0, fd, __LINE__); } } @@ -5365,10 +6125,12 @@ int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ ** epoch of noon in Greenwich on November 24, 4714 B.C according to the ** proleptic Gregorian calendar. ** -** On success, return 0. Return 1 if the time and date cannot be found. +** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date +** cannot be found. */ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; + int rc = SQLITE_OK; #if defined(NO_GETTOD) time_t t; time(&t); @@ -5379,8 +6141,11 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; #else struct timeval sNow; - gettimeofday(&sNow, 0); - *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; + if( gettimeofday(&sNow, 0)==0 ){ + *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; + }else{ + rc = SQLITE_ERROR; + } #endif #ifdef SQLITE_TEST @@ -5389,7 +6154,7 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ } #endif UNUSED_PARAMETER(NotUsed); - return 0; + return rc; } /* @@ -5398,11 +6163,12 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ ** return 0. Return 1 if the time and date cannot be found. */ static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ - sqlite3_int64 i; + sqlite3_int64 i = 0; + int rc; UNUSED_PARAMETER(NotUsed); - unixCurrentTimeInt64(0, &i); + rc = unixCurrentTimeInt64(0, &i); *prNow = i/86400000.0; - return 0; + return rc; } /* @@ -5448,7 +6214,7 @@ static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ ** address in the shared range is taken for a SHARED lock, the entire ** shared range is taken for an EXCLUSIVE lock): ** -** PENDING_BYTE 0x40000000 +** PENDING_BYTE 0x40000000 ** RESERVED_BYTE 0x40000001 ** SHARED_RANGE 0x40000002 -> 0x40000200 ** @@ -5655,7 +6421,7 @@ static int proxyCreateLockPath(const char *lockPath){ if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ buf[i]='\0'; - if( mkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ + if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ int err=errno; if( err!=EEXIST ) { OSTRACE(("CREATELOCKPATH FAILED creating %s, " @@ -5686,7 +6452,6 @@ static int proxyCreateUnixFile( int islockfile /* if non zero missing dirs will be created */ ) { int fd = -1; - int dirfd = -1; unixFile *pNew; int rc = SQLITE_OK; int openFlags = O_RDWR | O_CREAT; @@ -5710,17 +6475,17 @@ static int proxyCreateUnixFile( } } if( fd<0 ){ - fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(path, openFlags, 0); terrno = errno; if( fd<0 && errno==ENOENT && islockfile ){ if( proxyCreateLockPath(path) == SQLITE_OK ){ - fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(path, openFlags, 0); } } } if( fd<0 ){ openFlags = O_RDONLY; - fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(path, openFlags, 0); terrno = errno; } if( fd<0 ){ @@ -5751,7 +6516,7 @@ static int proxyCreateUnixFile( pUnused->flags = openFlags; pNew->pUnused = pUnused; - rc = fillInUnixFile(&dummyVfs, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0, 0); + rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); if( rc==SQLITE_OK ){ *ppFile = pNew; return SQLITE_OK; @@ -5791,6 +6556,8 @@ static int proxyGetHostID(unsigned char *pHostID, int *pError){ return SQLITE_IOERR; } } +#else + UNUSED_PARAMETER(pError); #endif #ifdef SQLITE_TEST /* simulate multiple hosts by creating unique hostid file paths */ @@ -5842,8 +6609,7 @@ static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ goto end_breaklock; } /* write it out to the temporary break file */ - fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), - SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0); if( fd<0 ){ sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); goto end_breaklock; @@ -5865,7 +6631,7 @@ static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ end_breaklock: if( rc ){ if( fd>=0 ){ - unlink(tPath); + osUnlink(tPath); robust_close(pFile, fd, __LINE__); } fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); @@ -5883,6 +6649,7 @@ static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ int nTries = 0; struct timespec conchModTime; + memset(&conchModTime, 0, sizeof(conchModTime)); do { rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); nTries ++; @@ -6114,12 +6881,12 @@ static int proxyTakeConch(unixFile *pFile){ end_takeconch: OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); if( rc==SQLITE_OK && pFile->openFlags ){ + int fd; if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); } pFile->h = -1; - int fd = robust_open(pCtx->dbPath, pFile->openFlags, - SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); if( fd>=0 ){ pFile->h = fd; @@ -6688,7 +7455,7 @@ int sqlite3_os_init(void){ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==16 ); + assert( ArraySize(aSyscall)==24 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ |