diff --git a/src/bitset.h b/src/bitset.h index 81d68ca..3113d0a 100644 --- a/src/bitset.h +++ b/src/bitset.h @@ -78,9 +78,11 @@ static inline int bit_run_count(char* b, int from, int len) { /** An application of a bitset - a bitset mapping represents a file of ''size'' * broken down into ''resolution''-sized chunks. The bit set is assumed to - * represent one bit per chunk. + * represent one bit per chunk. We also bundle a lock so that the set can be + * written reliably by multiple threads. */ struct bitset_mapping { + pthread_mutex_t lock; uint64_t size; int resolution; char bits[]; @@ -100,6 +102,8 @@ static inline struct bitset_mapping* bitset_alloc( ); bitset->size = size; bitset->resolution = resolution; + /* don't actually need to call pthread_mutex_destroy '*/ + pthread_mutex_init(&bitset->lock, NULL); return bitset; } @@ -108,6 +112,13 @@ static inline struct bitset_mapping* bitset_alloc( last = (from+len-1)/set->resolution, \ bitlen = last-first+1 +#define BITSET_LOCK \ + FATAL_IF_NEGATIVE(pthread_mutex_lock(&set->lock), "Error locking bitset") + +#define BITSET_UNLOCK \ + FATAL_IF_NEGATIVE(pthread_mutex_unlock(&set->lock), "Error unlocking bitset") + + /** Set the bits in a bitset which correspond to the given bytes in the larger * file. */ @@ -117,7 +128,9 @@ static inline void bitset_set_range( uint64_t len) { INT_FIRST_AND_LAST; + BITSET_LOCK; bit_set_range(set->bits, first, bitlen); + BITSET_UNLOCK; } @@ -140,7 +153,9 @@ static inline void bitset_clear_range( uint64_t len) { INT_FIRST_AND_LAST; + BITSET_LOCK; bit_clear_range(set->bits, first, bitlen); + BITSET_UNLOCK; } @@ -163,10 +178,14 @@ static inline int bitset_run_count( { /* now fix in case len goes past the end of the memory we have * control of */ + int run; len = len+from>set->size ? set->size-from : len; INT_FIRST_AND_LAST; - return (bit_run_count(set->bits, first, bitlen) * set->resolution) - + BITSET_LOCK; + run = (bit_run_count(set->bits, first, bitlen) * set->resolution) - (from % set->resolution); + BITSET_UNLOCK; + return run; } /** Tests whether the bit field is clear for the given file offset. diff --git a/src/client.c b/src/client.c index dcb76ad..91b92b3 100644 --- a/src/client.c +++ b/src/client.c @@ -407,13 +407,11 @@ void client_reply_to_read( struct client* client, struct nbd_request request ) void client_reply_to_write( struct client* client, struct nbd_request request ) { debug("request write %ld+%d", request.from, request.len); - if (client->serve->allocation_map) { + if (client->serve->allocation_map_built) { write_not_zeroes( client, request.from, request.len ); } else { debug("No allocation map, writing directly."); - /* If we get cut off partway through reading this data - * */ ERROR_IF_NEGATIVE( readloop( client->socket, client->mapped + request.from, @@ -423,6 +421,14 @@ void client_reply_to_write( struct client* client, struct nbd_request request ) request.len ); server_dirty(client->serve, request.from, request.len); + /* the allocation_map is shared between client threads, and may be + * being built. But AFAICT this is safe, to accurately reflect the + * fact that we've caused block allocation to occur, though we will + * never consult the allocation_map until the builder thread has + * finished. + */ + bitset_set_range(client->serve->allocation_map, + request.from, request.len); } if (1) /* not sure whether this is necessary... */ diff --git a/src/ioutil.c b/src/ioutil.c index 5abafce..3c92c3b 100644 --- a/src/ioutil.c +++ b/src/ioutil.c @@ -11,83 +11,51 @@ #include "util.h" #include "bitset.h" -struct bitset_mapping* build_allocation_map(int fd, uint64_t size, int resolution) +int build_allocation_map(struct bitset_mapping* allocation_map, int fd) { - unsigned int i; - struct bitset_mapping* allocation_map = bitset_alloc(size, resolution); - struct fiemap *fiemap_count = NULL, *fiemap = NULL; + /* break blocking ioctls down */ + const unsigned long max_length = 100*1024*1024; + const unsigned int max_extents = 1000; - fiemap_count = (struct fiemap*) xmalloc(sizeof(struct fiemap)); + unsigned long offset = 0; - fiemap_count->fm_start = 0; - fiemap_count->fm_length = size; - fiemap_count->fm_flags = FIEMAP_FLAG_SYNC; - fiemap_count->fm_extent_count = 0; - fiemap_count->fm_mapped_extents = 0; + struct { + struct fiemap fiemap; + struct fiemap_extent extents[max_extents]; + } fiemap_static; + struct fiemap* fiemap = (struct fiemap*) &fiemap_static; - /* Find out how many extents there are */ - if (ioctl(fd, FS_IOC_FIEMAP, fiemap_count) < 0) { - debug( "Couldn't get fiemap_count, returning no allocation_map" ); - goto no_map; - } + memset(&fiemap_static, 0, sizeof(fiemap_static)); - /* Resize fiemap to allow us to read in the extents */ - fiemap = (struct fiemap*)xmalloc( - sizeof(struct fiemap) + ( - sizeof(struct fiemap_extent) * - fiemap_count->fm_mapped_extents - ) + for (offset = 0; offset < allocation_map->size; offset += fiemap->fm_length) { + unsigned int i; + + fiemap->fm_start = offset; + fiemap->fm_length = max_length; + if (offset + max_length > allocation_map->size) + fiemap->fm_length = allocation_map->size-offset; + fiemap->fm_flags = FIEMAP_FLAG_SYNC; + fiemap->fm_extent_count = max_extents; + fiemap->fm_mapped_extents = 0; + + if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) { + debug( "Couldn't get fiemap, returning no allocation_map" ); + free(allocation_map); + return NULL; + } + + for (i=0;ifm_mapped_extents;i++) { + bitset_set_range( + allocation_map, + fiemap->fm_extents[i].fe_logical, + fiemap->fm_extents[i].fe_length ); - - /* realloc makes valgrind complain a lot */ - memcpy(fiemap, fiemap_count, sizeof(struct fiemap)); - free( fiemap_count ); - - fiemap->fm_extent_count = fiemap->fm_mapped_extents; - fiemap->fm_mapped_extents = 0; - - if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) { - debug( "Couldn't get fiemap, returning no allocation_map" ); - goto no_map; - } - - for (i=0;ifm_mapped_extents;i++) { - bitset_set_range( - allocation_map, - fiemap->fm_extents[i].fe_logical, - fiemap->fm_extents[i].fe_length - ); - } - - /* This is pointlessly verbose for real discs, it's here as a - * reference for pulling data out of the allocation map */ - if ( 0 ) { - for (i=0; i<(size/resolution); i++) { - debug("map[%d] = %d%d%d%d%d%d%d%d", - i, - (allocation_map->bits[i] & 1) == 1, - (allocation_map->bits[i] & 2) == 2, - (allocation_map->bits[i] & 4) == 4, - (allocation_map->bits[i] & 8) == 8, - (allocation_map->bits[i] & 16) == 16, - (allocation_map->bits[i] & 32) == 32, - (allocation_map->bits[i] & 64) == 64, - (allocation_map->bits[i] & 128) == 128 - ); + //debug("range from %ld + %ld", fiemap->fm_extents[i].fe_logical, fiemap->fm_extents[i].fe_length); } } - - free(fiemap); - debug("Successfully built allocation map"); return allocation_map; - -no_map: - free( allocation_map ); - if ( NULL != fiemap ) { free( fiemap ); } - if ( NULL != fiemap_count ) { free( fiemap_count ); } - return NULL; } diff --git a/src/ioutil.h b/src/ioutil.h index 85bcb55..861ee84 100644 --- a/src/ioutil.h +++ b/src/ioutil.h @@ -4,13 +4,13 @@ #include "serve.h" struct bitset_mapping; /* don't need whole of bitset.h here */ -/** Returns a bit field representing which blocks are allocated in file - * descriptor ''fd''. You must supply the size, and the resolution at which - * you want the bits to represent allocated blocks. If the OS represents - * allocated blocks at a finer resolution than you've asked for, any block - * or part block will count as "allocated" with the corresponding bit set. +/** Scan the file opened in ''fd'', set bits in ''allocation_map'' that + * correspond to which blocks are physically allocated on disc (or part- + * allocated). If the OS represents allocated blocks at a finer resolution + * than you've asked for, any block or part block will count as "allocated" + * with the corresponding bit set. Returns 1 if successful, 0 otherwise. */ -struct bitset_mapping* build_allocation_map(int fd, off64_t size, int resolution); +int build_allocation_map(struct bitset_mapping* allocation_map, int fd); /** Repeat a write() operation that succeeds partially until ''size'' bytes * are written, or an error is returned, when it returns -1 as usual. diff --git a/src/serve.c b/src/serve.c index 52248b5..fb66800 100644 --- a/src/serve.c +++ b/src/serve.c @@ -757,6 +757,25 @@ void serve_accept_loop(struct server* params) while( server_accept( params ) ); } +void* build_allocation_map_thread(void* params1) +{ + struct server* params = (struct server*) params1; + int fd = open(params->filename, O_RDONLY); + FATAL_IF_NEGATIVE(fd, "Couldn't open %s", params->filename); + NULLCHECK(params); + + params->allocation_map = bitset_alloc(params->size, + block_allocation_resolution); + + if (build_allocation_map(params->allocation_map, fd)) + params->allocation_map_built = 1; + else + warn("Didn't build allocation map for %s", params->filename); + + close(fd); + return NULL; +} + /** Initialisation function that sets up the initial allocation map, i.e. so * we know which blocks of the file are allocated. */ @@ -772,9 +791,9 @@ void serve_init_allocation_map(struct server* params) params->size = size; FATAL_IF_NEGATIVE(size, "Couldn't find size of %s", params->filename); - params->allocation_map = - build_allocation_map(fd, size, block_allocation_resolution); - close(fd); + FATAL_IF_NEGATIVE(pthread_create(¶ms->allocation_map_builder_thread, + NULL, build_allocation_map_thread, params), + "Couldn't create thread"); } @@ -818,9 +837,13 @@ void serve_cleanup(struct server* params, info("cleaning up"); int i; + void* status; if (params->server_fd){ close(params->server_fd); } + /* need to stop background build if we're killed very early on */ + pthread_cancel(params->allocation_map_builder_thread); + pthread_join(params->allocation_map_builder_thread, &status); if (params->allocation_map) { free(params->allocation_map); } @@ -839,7 +862,6 @@ void serve_cleanup(struct server* params, for (i=0; i < params->max_nbd_clients; i++) { - void* status; pthread_t thread_id = params->nbd_client[i].thread; if (thread_id != 0) { diff --git a/src/serve.h b/src/serve.h index 02c7021..1132f99 100644 --- a/src/serve.h +++ b/src/serve.h @@ -3,6 +3,7 @@ #include #include +#include /* for sig_atomic_t */ #include "flexnbd.h" #include "parse.h" @@ -66,7 +67,22 @@ struct server { int server_fd; int control_fd; + /* the allocation_map keeps track of which blocks in the backing file + * have been allocated, or part-allocated on disc, with unallocated + * blocks presumed to contain zeroes (i.e. represented as sparse files + * by the filesystem). We can use this information when receiving + * incoming writes, and avoid writing zeroes to unallocated sections + * of the file which would needlessly increase disc usage. This + * bitmap will start at all-zeroes for an empty file, and tend towards + * all-ones as the file is written to (i.e. we assume that allocated + * blocks can never become unallocated again, as is the case with ext3 + * at least). + */ struct bitset_mapping* allocation_map; + /* when starting up, this thread builds the allocation_map */ + pthread_t allocation_map_builder_thread; + /* when the thread has finished, it sets this to 1 */ + volatile sig_atomic_t allocation_map_built; int max_nbd_clients; struct client_tbl_entry *nbd_client;