summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/os_posix/os_map.c
blob: b33f6d82e3464e1da475a984bacb5e738fa21e5e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/*-
 * Copyright (c) 2014-2016 MongoDB, Inc.
 * Copyright (c) 2008-2014 WiredTiger, Inc.
 *	All rights reserved.
 *
 * See the file LICENSE for redistribution information.
 */

#include "wt_internal.h"

/*
 * __wt_posix_map --
 *	Map a file into memory.
 */
int
__wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session,
    void *mapped_regionp, size_t *lenp, void *mapped_cookiep)
{
	WT_FILE_HANDLE_POSIX *pfh;
	WT_SESSION_IMPL *session;
	size_t len;
	wt_off_t file_size;
	void *map;

	WT_UNUSED(mapped_cookiep);

	session = (WT_SESSION_IMPL *)wt_session;
	pfh = (WT_FILE_HANDLE_POSIX *)fh;

	/*
	 * Mapping isn't possible if direct I/O configured for the file, the
	 * Linux open(2) documentation says applications should avoid mixing
	 * mmap(2) of files with direct I/O to the same files.
	 */
	if (pfh->direct_io)
		return (ENOTSUP);

	/*
	 * There's no locking here to prevent the underlying file from changing
	 * underneath us, our caller needs to ensure consistency of the mapped
	 * region vs. any other file activity.
	 */
	WT_RET(fh->fh_size(fh, wt_session, &file_size));
	len = (size_t)file_size;

	(void)__wt_verbose(session, WT_VERB_HANDLEOPS,
	    "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len);

	if ((map = mmap(NULL, len,
	    PROT_READ,
#ifdef MAP_NOCORE
	    MAP_NOCORE |
#endif
	    MAP_PRIVATE,
	    pfh->fd, (wt_off_t)0)) == MAP_FAILED)
		WT_RET_MSG(session,
		    __wt_errno(), "%s: memory-map: mmap", fh->name);

	*(void **)mapped_regionp = map;
	*lenp = len;
	return (0);
}

#ifdef HAVE_POSIX_MADVISE
/*
 * __wt_posix_map_preload --
 *	Cause a section of a memory map to be faulted in.
 */
int
__wt_posix_map_preload(WT_FILE_HANDLE *fh,
    WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie)
{
	WT_BM *bm;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	void *blk;

	WT_UNUSED(mapped_cookie);

	session = (WT_SESSION_IMPL *)wt_session;

	conn = S2C(session);
	bm = S2BT(session)->bm;

	/* Linux requires the address be aligned to a 4KB boundary. */
	blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
	length += WT_PTRDIFF(map, blk);

	/* XXX proxy for "am I doing a scan?" -- manual read-ahead */
	if (F_ISSET(session, WT_SESSION_NO_CACHE)) {
		/* Read in 2MB blocks every 1MB of data. */
		if (((uintptr_t)((uint8_t *)blk + length) &
		    (uintptr_t)((1<<20) - 1)) < (uintptr_t)blk)
			return (0);
		length = WT_MIN(WT_MAX(20 * length, 2 << 20),
		    WT_PTRDIFF((uint8_t *)bm->map + bm->maplen, blk));
	}

	/*
	 * Manual pages aren't clear on whether alignment is required for the
	 * size, so we will be conservative.
	 */
	length &= ~(size_t)(conn->page_size - 1);
	if (length <= (size_t)conn->page_size)
		return (0);

	WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_WILLNEED), ret);
	if (ret == 0)
		return (0);

	WT_RET_MSG(session, ret,
	    "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED",
	    fh->name);
}
#endif

#ifdef HAVE_POSIX_MADVISE
/*
 * __wt_posix_map_discard --
 *	Discard a chunk of the memory map.
 */
int
__wt_posix_map_discard(WT_FILE_HANDLE *fh,
    WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	void *blk;

	WT_UNUSED(mapped_cookie);

	session = (WT_SESSION_IMPL *)wt_session;
	conn = S2C(session);

	/* Linux requires the address be aligned to a 4KB boundary. */
	blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
	length += WT_PTRDIFF(map, blk);

	WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_DONTNEED), ret);
	if (ret == 0)
		return (0);

	WT_RET_MSG(session, ret,
	    "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED",
	    fh->name);
}
#endif

/*
 * __wt_posix_unmap --
 *	Remove a memory mapping.
 */
int
__wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session,
    void *mapped_region, size_t len, void *mapped_cookie)
{
	WT_SESSION_IMPL *session;

	WT_UNUSED(mapped_cookie);

	session = (WT_SESSION_IMPL *)wt_session;

	(void)__wt_verbose(session, WT_VERB_HANDLEOPS,
	    "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);

	if (munmap(mapped_region, len) == 0)
		return (0);

	WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name);
}