Block allocation map is now built in a separate thread, and does not delay

server startup (sparse write avoidance doesn't happen until it is finished).
Added mutex to bitset functions, which were already being called from
multiple threads.  Rewrote allocation map builder to request file
information in multiple chunks, to avoid uninterruptible wait and dynamic
memory allocation.
This commit is contained in:
Matthew Bloch
2012-10-07 21:55:01 +01:00
parent 7b13964c39
commit a49cf14927
6 changed files with 112 additions and 81 deletions

View File

@@ -78,9 +78,11 @@ static inline int bit_run_count(char* b, int from, int len) {
/** An application of a bitset - a bitset mapping represents a file of ''size''
* broken down into ''resolution''-sized chunks. The bit set is assumed to
* represent one bit per chunk.
* represent one bit per chunk. We also bundle a lock so that the set can be
* written reliably by multiple threads.
*/
struct bitset_mapping {
pthread_mutex_t lock;
uint64_t size;
int resolution;
char bits[];
@@ -100,6 +102,8 @@ static inline struct bitset_mapping* bitset_alloc(
);
bitset->size = size;
bitset->resolution = resolution;
/* don't actually need to call pthread_mutex_destroy '*/
pthread_mutex_init(&bitset->lock, NULL);
return bitset;
}
@@ -108,6 +112,13 @@ static inline struct bitset_mapping* bitset_alloc(
last = (from+len-1)/set->resolution, \
bitlen = last-first+1
#define BITSET_LOCK \
FATAL_IF_NEGATIVE(pthread_mutex_lock(&set->lock), "Error locking bitset")
#define BITSET_UNLOCK \
FATAL_IF_NEGATIVE(pthread_mutex_unlock(&set->lock), "Error unlocking bitset")
/** Set the bits in a bitset which correspond to the given bytes in the larger
* file.
*/
@@ -117,7 +128,9 @@ static inline void bitset_set_range(
uint64_t len)
{
INT_FIRST_AND_LAST;
BITSET_LOCK;
bit_set_range(set->bits, first, bitlen);
BITSET_UNLOCK;
}
@@ -140,7 +153,9 @@ static inline void bitset_clear_range(
uint64_t len)
{
INT_FIRST_AND_LAST;
BITSET_LOCK;
bit_clear_range(set->bits, first, bitlen);
BITSET_UNLOCK;
}
@@ -163,10 +178,14 @@ static inline int bitset_run_count(
{
/* now fix in case len goes past the end of the memory we have
* control of */
int run;
len = len+from>set->size ? set->size-from : len;
INT_FIRST_AND_LAST;
return (bit_run_count(set->bits, first, bitlen) * set->resolution) -
BITSET_LOCK;
run = (bit_run_count(set->bits, first, bitlen) * set->resolution) -
(from % set->resolution);
BITSET_UNLOCK;
return run;
}
/** Tests whether the bit field is clear for the given file offset.

View File

@@ -407,13 +407,11 @@ void client_reply_to_read( struct client* client, struct nbd_request request )
void client_reply_to_write( struct client* client, struct nbd_request request )
{
debug("request write %ld+%d", request.from, request.len);
if (client->serve->allocation_map) {
if (client->serve->allocation_map_built) {
write_not_zeroes( client, request.from, request.len );
}
else {
debug("No allocation map, writing directly.");
/* If we get cut off partway through reading this data
* */
ERROR_IF_NEGATIVE(
readloop( client->socket,
client->mapped + request.from,
@@ -423,6 +421,14 @@ void client_reply_to_write( struct client* client, struct nbd_request request )
request.len
);
server_dirty(client->serve, request.from, request.len);
/* the allocation_map is shared between client threads, and may be
* being built. But AFAICT this is safe, to accurately reflect the
* fact that we've caused block allocation to occur, though we will
* never consult the allocation_map until the builder thread has
* finished.
*/
bitset_set_range(client->serve->allocation_map,
request.from, request.len);
}
if (1) /* not sure whether this is necessary... */

View File

@@ -11,83 +11,51 @@
#include "util.h"
#include "bitset.h"
struct bitset_mapping* build_allocation_map(int fd, uint64_t size, int resolution)
int build_allocation_map(struct bitset_mapping* allocation_map, int fd)
{
unsigned int i;
struct bitset_mapping* allocation_map = bitset_alloc(size, resolution);
struct fiemap *fiemap_count = NULL, *fiemap = NULL;
/* break blocking ioctls down */
const unsigned long max_length = 100*1024*1024;
const unsigned int max_extents = 1000;
fiemap_count = (struct fiemap*) xmalloc(sizeof(struct fiemap));
unsigned long offset = 0;
fiemap_count->fm_start = 0;
fiemap_count->fm_length = size;
fiemap_count->fm_flags = FIEMAP_FLAG_SYNC;
fiemap_count->fm_extent_count = 0;
fiemap_count->fm_mapped_extents = 0;
struct {
struct fiemap fiemap;
struct fiemap_extent extents[max_extents];
} fiemap_static;
struct fiemap* fiemap = (struct fiemap*) &fiemap_static;
/* Find out how many extents there are */
if (ioctl(fd, FS_IOC_FIEMAP, fiemap_count) < 0) {
debug( "Couldn't get fiemap_count, returning no allocation_map" );
goto no_map;
}
memset(&fiemap_static, 0, sizeof(fiemap_static));
/* Resize fiemap to allow us to read in the extents */
fiemap = (struct fiemap*)xmalloc(
sizeof(struct fiemap) + (
sizeof(struct fiemap_extent) *
fiemap_count->fm_mapped_extents
)
for (offset = 0; offset < allocation_map->size; offset += fiemap->fm_length) {
unsigned int i;
fiemap->fm_start = offset;
fiemap->fm_length = max_length;
if (offset + max_length > allocation_map->size)
fiemap->fm_length = allocation_map->size-offset;
fiemap->fm_flags = FIEMAP_FLAG_SYNC;
fiemap->fm_extent_count = max_extents;
fiemap->fm_mapped_extents = 0;
if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) {
debug( "Couldn't get fiemap, returning no allocation_map" );
free(allocation_map);
return NULL;
}
for (i=0;i<fiemap->fm_mapped_extents;i++) {
bitset_set_range(
allocation_map,
fiemap->fm_extents[i].fe_logical,
fiemap->fm_extents[i].fe_length
);
/* realloc makes valgrind complain a lot */
memcpy(fiemap, fiemap_count, sizeof(struct fiemap));
free( fiemap_count );
fiemap->fm_extent_count = fiemap->fm_mapped_extents;
fiemap->fm_mapped_extents = 0;
if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) {
debug( "Couldn't get fiemap, returning no allocation_map" );
goto no_map;
}
for (i=0;i<fiemap->fm_mapped_extents;i++) {
bitset_set_range(
allocation_map,
fiemap->fm_extents[i].fe_logical,
fiemap->fm_extents[i].fe_length
);
}
/* This is pointlessly verbose for real discs, it's here as a
* reference for pulling data out of the allocation map */
if ( 0 ) {
for (i=0; i<(size/resolution); i++) {
debug("map[%d] = %d%d%d%d%d%d%d%d",
i,
(allocation_map->bits[i] & 1) == 1,
(allocation_map->bits[i] & 2) == 2,
(allocation_map->bits[i] & 4) == 4,
(allocation_map->bits[i] & 8) == 8,
(allocation_map->bits[i] & 16) == 16,
(allocation_map->bits[i] & 32) == 32,
(allocation_map->bits[i] & 64) == 64,
(allocation_map->bits[i] & 128) == 128
);
//debug("range from %ld + %ld", fiemap->fm_extents[i].fe_logical, fiemap->fm_extents[i].fe_length);
}
}
free(fiemap);
debug("Successfully built allocation map");
return allocation_map;
no_map:
free( allocation_map );
if ( NULL != fiemap ) { free( fiemap ); }
if ( NULL != fiemap_count ) { free( fiemap_count ); }
return NULL;
}

View File

@@ -4,13 +4,13 @@
#include "serve.h"
struct bitset_mapping; /* don't need whole of bitset.h here */
/** Returns a bit field representing which blocks are allocated in file
* descriptor ''fd''. You must supply the size, and the resolution at which
* you want the bits to represent allocated blocks. If the OS represents
* allocated blocks at a finer resolution than you've asked for, any block
* or part block will count as "allocated" with the corresponding bit set.
/** Scan the file opened in ''fd'', set bits in ''allocation_map'' that
* correspond to which blocks are physically allocated on disc (or part-
* allocated). If the OS represents allocated blocks at a finer resolution
* than you've asked for, any block or part block will count as "allocated"
* with the corresponding bit set. Returns 1 if successful, 0 otherwise.
*/
struct bitset_mapping* build_allocation_map(int fd, off64_t size, int resolution);
int build_allocation_map(struct bitset_mapping* allocation_map, int fd);
/** Repeat a write() operation that succeeds partially until ''size'' bytes
* are written, or an error is returned, when it returns -1 as usual.

View File

@@ -757,6 +757,25 @@ void serve_accept_loop(struct server* params)
while( server_accept( params ) );
}
void* build_allocation_map_thread(void* params1)
{
struct server* params = (struct server*) params1;
int fd = open(params->filename, O_RDONLY);
FATAL_IF_NEGATIVE(fd, "Couldn't open %s", params->filename);
NULLCHECK(params);
params->allocation_map = bitset_alloc(params->size,
block_allocation_resolution);
if (build_allocation_map(params->allocation_map, fd))
params->allocation_map_built = 1;
else
warn("Didn't build allocation map for %s", params->filename);
close(fd);
return NULL;
}
/** Initialisation function that sets up the initial allocation map, i.e. so
* we know which blocks of the file are allocated.
*/
@@ -772,9 +791,9 @@ void serve_init_allocation_map(struct server* params)
params->size = size;
FATAL_IF_NEGATIVE(size, "Couldn't find size of %s",
params->filename);
params->allocation_map =
build_allocation_map(fd, size, block_allocation_resolution);
close(fd);
FATAL_IF_NEGATIVE(pthread_create(&params->allocation_map_builder_thread,
NULL, build_allocation_map_thread, params),
"Couldn't create thread");
}
@@ -818,9 +837,13 @@ void serve_cleanup(struct server* params,
info("cleaning up");
int i;
void* status;
if (params->server_fd){ close(params->server_fd); }
/* need to stop background build if we're killed very early on */
pthread_cancel(params->allocation_map_builder_thread);
pthread_join(params->allocation_map_builder_thread, &status);
if (params->allocation_map) {
free(params->allocation_map);
}
@@ -839,7 +862,6 @@ void serve_cleanup(struct server* params,
for (i=0; i < params->max_nbd_clients; i++) {
void* status;
pthread_t thread_id = params->nbd_client[i].thread;
if (thread_id != 0) {

View File

@@ -3,6 +3,7 @@
#include <sys/types.h>
#include <unistd.h>
#include <signal.h> /* for sig_atomic_t */
#include "flexnbd.h"
#include "parse.h"
@@ -66,7 +67,22 @@ struct server {
int server_fd;
int control_fd;
/* the allocation_map keeps track of which blocks in the backing file
* have been allocated, or part-allocated on disc, with unallocated
* blocks presumed to contain zeroes (i.e. represented as sparse files
* by the filesystem). We can use this information when receiving
* incoming writes, and avoid writing zeroes to unallocated sections
* of the file which would needlessly increase disc usage. This
* bitmap will start at all-zeroes for an empty file, and tend towards
* all-ones as the file is written to (i.e. we assume that allocated
* blocks can never become unallocated again, as is the case with ext3
* at least).
*/
struct bitset_mapping* allocation_map;
/* when starting up, this thread builds the allocation_map */
pthread_t allocation_map_builder_thread;
/* when the thread has finished, it sets this to 1 */
volatile sig_atomic_t allocation_map_built;
int max_nbd_clients;
struct client_tbl_entry *nbd_client;