summaryrefslogtreecommitdiff
path: root/lib/device/bcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/device/bcache.c')
-rw-r--r--lib/device/bcache.c188
1 files changed, 135 insertions, 53 deletions
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
index 03cd4be87..c78445d99 100644
--- a/lib/device/bcache.c
+++ b/lib/device/bcache.c
@@ -33,6 +33,11 @@
#define SECTOR_SHIFT 9L
+#define FD_TABLE_INC 1024
+static int _fd_table_size;
+static int *_fd_table;
+
+
//----------------------------------------------------------------
static void log_sys_warn(const char *call)
@@ -155,11 +160,11 @@ static void _async_destroy(struct io_engine *ioe)
free(e);
}
-static int _last_byte_fd;
+static int _last_byte_di;
static uint64_t _last_byte_offset;
static int _last_byte_sector_size;
-static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
+static bool _async_issue(struct io_engine *ioe, enum dir d, int di,
sector_t sb, sector_t se, void *data, void *context)
{
int r;
@@ -183,7 +188,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
/*
* If bcache block goes past where lvm wants to write, then clamp it.
*/
- if ((d == DIR_WRITE) && _last_byte_offset && (fd == _last_byte_fd)) {
+ if ((d == DIR_WRITE) && _last_byte_offset && (di == _last_byte_di)) {
if (offset > _last_byte_offset) {
log_error("Limit write at %llu len %llu beyond last byte %llu",
(unsigned long long)offset,
@@ -268,7 +273,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
memset(&cb->cb, 0, sizeof(cb->cb));
- cb->cb.aio_fildes = (int) fd;
+ cb->cb.aio_fildes = (int) _fd_table[di];
cb->cb.u.c.buf = data;
cb->cb.u.c.offset = offset;
cb->cb.u.c.nbytes = nbytes;
@@ -276,13 +281,15 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
#if 0
if (d == DIR_READ) {
- log_debug("io R off %llu bytes %llu",
+ log_debug("io R off %llu bytes %llu di %d fd %d",
(unsigned long long)cb->cb.u.c.offset,
- (unsigned long long)cb->cb.u.c.nbytes);
+ (unsigned long long)cb->cb.u.c.nbytes,
+ di, _fd_table[di]);
} else {
- log_debug("io W off %llu bytes %llu",
+ log_debug("io W off %llu bytes %llu di %d fd %d",
(unsigned long long)cb->cb.u.c.offset,
- (unsigned long long)cb->cb.u.c.nbytes);
+ (unsigned long long)cb->cb.u.c.nbytes,
+ di, _fd_table[di]);
}
#endif
@@ -414,7 +421,7 @@ static void _sync_destroy(struct io_engine *ioe)
free(e);
}
-static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
+static bool _sync_issue(struct io_engine *ioe, enum dir d, int di,
sector_t sb, sector_t se, void *data, void *context)
{
int rv;
@@ -430,7 +437,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
}
where = sb * 512;
- off = lseek(fd, where, SEEK_SET);
+ off = lseek(_fd_table[di], where, SEEK_SET);
if (off == (off_t) -1) {
log_warn("Device seek error %d for offset %llu", errno, (unsigned long long)where);
free(io);
@@ -445,7 +452,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
/*
* If bcache block goes past where lvm wants to write, then clamp it.
*/
- if ((d == DIR_WRITE) && _last_byte_offset && (fd == _last_byte_fd)) {
+ if ((d == DIR_WRITE) && _last_byte_offset && (di == _last_byte_di)) {
uint64_t offset = where;
uint64_t nbytes = len;
sector_t limit_nbytes = 0;
@@ -526,9 +533,9 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
while (pos < len) {
if (d == DIR_READ)
- rv = read(fd, (char *)data + pos, len - pos);
+ rv = read(_fd_table[di], (char *)data + pos, len - pos);
else
- rv = write(fd, (char *)data + pos, len - pos);
+ rv = write(_fd_table[di], (char *)data + pos, len - pos);
if (rv == -1 && errno == EINTR)
continue;
@@ -688,7 +695,7 @@ struct bcache {
//----------------------------------------------------------------
struct key_parts {
- uint32_t fd;
+ uint32_t di;
uint64_t b;
} __attribute__ ((packed));
@@ -697,12 +704,12 @@ union key {
uint8_t bytes[12];
};
-static struct block *_block_lookup(struct bcache *cache, int fd, uint64_t i)
+static struct block *_block_lookup(struct bcache *cache, int di, uint64_t i)
{
union key k;
union radix_value v;
- k.parts.fd = fd;
+ k.parts.di = di;
k.parts.b = i;
if (radix_tree_lookup(cache->rtree, k.bytes, k.bytes + sizeof(k.bytes), &v))
@@ -716,7 +723,7 @@ static bool _block_insert(struct block *b)
union key k;
union radix_value v;
- k.parts.fd = b->fd;
+ k.parts.di = b->di;
k.parts.b = b->index;
v.ptr = b;
@@ -727,7 +734,7 @@ static void _block_remove(struct block *b)
{
union key k;
- k.parts.fd = b->fd;
+ k.parts.di = b->di;
k.parts.b = b->index;
radix_tree_remove(b->cache->rtree, k.bytes, k.bytes + sizeof(k.bytes));
@@ -869,7 +876,7 @@ static void _issue_low_level(struct block *b, enum dir d)
dm_list_move(&cache->io_pending, &b->list);
- if (!cache->engine->issue(cache->engine, d, b->fd, sb, se, b->data, b)) {
+ if (!cache->engine->issue(cache->engine, d, b->di, sb, se, b->data, b)) {
/* FIXME: if io_submit() set an errno, return that instead of EIO? */
_complete_io(b, -EIO);
return;
@@ -945,7 +952,7 @@ static struct block *_find_unused_clean_block(struct bcache *cache)
return NULL;
}
-static struct block *_new_block(struct bcache *cache, int fd, block_address i, bool can_wait)
+static struct block *_new_block(struct bcache *cache, int di, block_address i, bool can_wait)
{
struct block *b;
@@ -958,8 +965,8 @@ static struct block *_new_block(struct bcache *cache, int fd, block_address i, b
_writeback(cache, 16); // FIXME: magic number
_wait_io(cache);
} else {
- log_debug("bcache no new blocks for fd %d index %u",
- fd, (uint32_t) i);
+ log_debug("bcache no new blocks for di %d index %u",
+ di, (uint32_t) i);
return NULL;
}
}
@@ -968,7 +975,7 @@ static struct block *_new_block(struct bcache *cache, int fd, block_address i, b
if (b) {
dm_list_init(&b->list);
b->flags = 0;
- b->fd = fd;
+ b->di = di;
b->index = i;
b->ref_count = 0;
b->error = 0;
@@ -1014,10 +1021,10 @@ static void _miss(struct bcache *cache, unsigned flags)
}
static struct block *_lookup_or_read_block(struct bcache *cache,
- int fd, block_address i,
+ int di, block_address i,
unsigned flags)
{
- struct block *b = _block_lookup(cache, fd, i);
+ struct block *b = _block_lookup(cache, di, i);
if (b) {
// FIXME: this is insufficient. We need to also catch a read
@@ -1042,7 +1049,7 @@ static struct block *_lookup_or_read_block(struct bcache *cache,
} else {
_miss(cache, flags);
- b = _new_block(cache, fd, i, true);
+ b = _new_block(cache, di, i, true);
if (b) {
if (flags & GF_ZERO)
_zero_block(b);
@@ -1087,6 +1094,7 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks,
struct bcache *cache;
unsigned max_io = engine->max_io(engine);
long pgsize = sysconf(_SC_PAGESIZE);
+ int i;
if (pgsize < 0) {
log_warn("WARNING: _SC_PAGESIZE returns negative value.");
@@ -1147,6 +1155,18 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks,
return NULL;
}
+ _fd_table_size = FD_TABLE_INC;
+
+ if (!(_fd_table = malloc(sizeof(int) * _fd_table_size))) {
+ cache->engine->destroy(cache->engine);
+ radix_tree_destroy(cache->rtree);
+ free(cache);
+ return NULL;
+ }
+
+ for (i = 0; i < _fd_table_size; i++)
+ _fd_table[i] = -1;
+
return cache;
}
@@ -1162,6 +1182,9 @@ void bcache_destroy(struct bcache *cache)
radix_tree_destroy(cache->rtree);
cache->engine->destroy(cache->engine);
free(cache);
+ free(_fd_table);
+ _fd_table = NULL;
+ _fd_table_size = 0;
}
sector_t bcache_block_sectors(struct bcache *cache)
@@ -1179,13 +1202,13 @@ unsigned bcache_max_prefetches(struct bcache *cache)
return cache->max_io;
}
-void bcache_prefetch(struct bcache *cache, int fd, block_address i)
+void bcache_prefetch(struct bcache *cache, int di, block_address i)
{
- struct block *b = _block_lookup(cache, fd, i);
+ struct block *b = _block_lookup(cache, di, i);
if (!b) {
if (cache->nr_io_pending < cache->max_io) {
- b = _new_block(cache, fd, i, false);
+ b = _new_block(cache, di, i, false);
if (b) {
cache->prefetches++;
_issue_read(b);
@@ -1203,12 +1226,15 @@ static void _recycle_block(struct bcache *cache, struct block *b)
_free_block(b);
}
-bool bcache_get(struct bcache *cache, int fd, block_address i,
+bool bcache_get(struct bcache *cache, int di, block_address i,
unsigned flags, struct block **result)
{
struct block *b;
- b = _lookup_or_read_block(cache, fd, i, flags);
+ if (di >= _fd_table_size)
+ goto bad;
+
+ b = _lookup_or_read_block(cache, di, i, flags);
if (b) {
if (b->error) {
if (b->io_dir == DIR_READ) {
@@ -1227,10 +1253,10 @@ bool bcache_get(struct bcache *cache, int fd, block_address i,
*result = b;
return true;
}
-
+bad:
*result = NULL;
- log_error("bcache failed to get block %u fd %d", (uint32_t) i, fd);
+ log_error("bcache failed to get block %u di %d", (uint32_t) i, di);
return false;
}
@@ -1294,7 +1320,7 @@ static bool _invalidate_block(struct bcache *cache, struct block *b)
if (b->ref_count) {
log_warn("bcache_invalidate: block (%d, %llu) still held",
- b->fd, (unsigned long long) b->index);
+ b->di, (unsigned long long) b->index);
return false;
}
@@ -1311,9 +1337,9 @@ static bool _invalidate_block(struct bcache *cache, struct block *b)
return true;
}
-bool bcache_invalidate(struct bcache *cache, int fd, block_address i)
+bool bcache_invalidate(struct bcache *cache, int di, block_address i)
{
- return _invalidate_block(cache, _block_lookup(cache, fd, i));
+ return _invalidate_block(cache, _block_lookup(cache, di, i));
}
//----------------------------------------------------------------
@@ -1342,14 +1368,14 @@ static bool _invalidate_v(struct radix_tree_iterator *it,
if (b->error || _test_flags(b, BF_DIRTY)) {
log_warn("bcache_invalidate: block (%d, %llu) still dirty",
- b->fd, (unsigned long long) b->index);
+ b->di, (unsigned long long) b->index);
iit->success = false;
return true;
}
if (b->ref_count) {
log_warn("bcache_invalidate: block (%d, %llu) still held",
- b->fd, (unsigned long long) b->index);
+ b->di, (unsigned long long) b->index);
iit->success = false;
return true;
}
@@ -1362,24 +1388,24 @@ static bool _invalidate_v(struct radix_tree_iterator *it,
return true;
}
-bool bcache_invalidate_fd(struct bcache *cache, int fd)
+bool bcache_invalidate_di(struct bcache *cache, int di)
{
union key k;
struct invalidate_iterator it;
- k.parts.fd = fd;
+ k.parts.di = di;
it.it.visit = _writeback_v;
- radix_tree_iterate(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.fd), &it.it);
+ radix_tree_iterate(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.di), &it.it);
_wait_all(cache);
it.success = true;
it.it.visit = _invalidate_v;
- radix_tree_iterate(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.fd), &it.it);
+ radix_tree_iterate(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.di), &it.it);
if (it.success)
- radix_tree_remove_prefix(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.fd));
+ radix_tree_remove_prefix(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.di));
return it.success;
}
@@ -1393,7 +1419,7 @@ static bool _abort_v(struct radix_tree_iterator *it,
if (b->ref_count) {
log_fatal("bcache_abort: block (%d, %llu) still held",
- b->fd, (unsigned long long) b->index);
+ b->di, (unsigned long long) b->index);
return true;
}
@@ -1405,35 +1431,91 @@ static bool _abort_v(struct radix_tree_iterator *it,
return true;
}
-void bcache_abort_fd(struct bcache *cache, int fd)
+void bcache_abort_di(struct bcache *cache, int di)
{
union key k;
struct radix_tree_iterator it;
- k.parts.fd = fd;
+ k.parts.di = di;
it.visit = _abort_v;
- radix_tree_iterate(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.fd), &it);
- radix_tree_remove_prefix(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.fd));
+ radix_tree_iterate(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.di), &it);
+ radix_tree_remove_prefix(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.di));
}
//----------------------------------------------------------------
-void bcache_set_last_byte(struct bcache *cache, int fd, uint64_t offset, int sector_size)
+void bcache_set_last_byte(struct bcache *cache, int di, uint64_t offset, int sector_size)
{
- _last_byte_fd = fd;
+ _last_byte_di = di;
_last_byte_offset = offset;
_last_byte_sector_size = sector_size;
if (!sector_size)
_last_byte_sector_size = 512;
}
-void bcache_unset_last_byte(struct bcache *cache, int fd)
+void bcache_unset_last_byte(struct bcache *cache, int di)
{
- if (_last_byte_fd == fd) {
- _last_byte_fd = 0;
+ if (_last_byte_di == di) {
+ _last_byte_di = 0;
_last_byte_offset = 0;
_last_byte_sector_size = 0;
}
}
+int bcache_set_fd(int fd)
+{
+ int *new_table = NULL;
+ int new_size = 0;
+ int i;
+
+ retry:
+ for (i = 0; i < _fd_table_size; i++) {
+ if (_fd_table[i] == -1) {
+ _fd_table[i] = fd;
+ return i;
+ }
+ }
+
+ /* already tried once, shouldn't happen */
+ if (new_size)
+ return -1;
+
+ new_size = _fd_table_size + FD_TABLE_INC;
+
+ new_table = realloc(_fd_table, sizeof(int) * new_size);
+ if (!new_table) {
+ log_error("Cannot extend bcache fd table");
+ return -1;
+ }
+
+ for (i = _fd_table_size; i < new_size; i++)
+ new_table[i] = -1;
+
+ _fd_table = new_table;
+ _fd_table_size = new_size;
+
+ goto retry;
+}
+
+/*
+ * Should we check for unflushed or inprogress io on an fd
+ * prior to doing clear_fd or change_fd? (To catch mistakes;
+ * the caller should be smart enough to not do that.)
+ */
+
+void bcache_clear_fd(int di)
+{
+ if (di >= _fd_table_size)
+ return;
+ _fd_table[di] = -1;
+}
+
+int bcache_change_fd(int di, int fd)
+{
+ if (di >= _fd_table_size)
+ return -1;
+ _fd_table[di] = fd;
+ return 1;
+}
+