Stopped NBD writes from committing all-zero blocks to disc (tentative, needs
further testing).
This commit is contained in:
40
bitset.h
40
bitset.h
@@ -1,19 +1,25 @@
|
|||||||
#ifndef __BITSET_H
|
#ifndef __BITSET_H
|
||||||
#define __BITSET_H
|
#define __BITSET_H
|
||||||
|
|
||||||
|
#include <inttypes.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
static inline char char_with_bit_set(int num) {
|
static inline char char_with_bit_set(int num) { return 1<<(num%8); }
|
||||||
return 1<<(num%8);
|
|
||||||
}
|
|
||||||
static inline int bit_is_set(char* b, int idx) {
|
static inline int bit_is_set(char* b, int idx) {
|
||||||
return (b[idx/8] & char_with_bit_set(idx)) != 0;
|
return (b[idx/8] & char_with_bit_set(idx)) != 0;
|
||||||
}
|
}
|
||||||
static inline int bit_is_clear(char* b, int idx) {
|
static inline int bit_is_clear(char* b, int idx) {
|
||||||
return !bit_is_set(b, idx);
|
return !bit_is_set(b, idx);
|
||||||
}
|
}
|
||||||
|
static inline int bit_has_value(char* b, int idx, int value) {
|
||||||
|
if (value)
|
||||||
|
return bit_is_set(b, idx);
|
||||||
|
else
|
||||||
|
return bit_is_clear(b, idx);
|
||||||
|
}
|
||||||
static inline void bit_set(char* b, int idx) {
|
static inline void bit_set(char* b, int idx) {
|
||||||
b[idx/8] &= char_with_bit_set(idx);
|
b[idx/8] |= char_with_bit_set(idx);
|
||||||
}
|
}
|
||||||
static inline void bit_clear(char* b, int idx) {
|
static inline void bit_clear(char* b, int idx) {
|
||||||
b[idx/8] &= ~char_with_bit_set(idx);
|
b[idx/8] &= ~char_with_bit_set(idx);
|
||||||
@@ -35,5 +41,31 @@ static inline void bit_clear_range(char* b, int from, int len) {
|
|||||||
bit_clear(b, from++);
|
bit_clear(b, from++);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int bit_run_count(char* b, int from, int len) {
|
||||||
|
int count;
|
||||||
|
int first_value = bit_is_set(b, from);
|
||||||
|
|
||||||
|
for (count=0; len > 0 && bit_has_value(b, from+count, first_value); count++, len--)
|
||||||
|
;
|
||||||
|
|
||||||
|
/* FIXME: debug this later */
|
||||||
|
/*for (; (from+count) % 64 != 0 && len > 0; len--)
|
||||||
|
if (bit_has_value(b, from+count, first_value))
|
||||||
|
count++;
|
||||||
|
else
|
||||||
|
return count;
|
||||||
|
for (; len >= 64; len-=64) {
|
||||||
|
if (*((uint64_t*)(b + ((from+count)/8))) == UINT64_MAX)
|
||||||
|
count += 64;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
for (; len > 0; len--)
|
||||||
|
if (bit_is_set(b, from+count))
|
||||||
|
count++;*/
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -18,7 +18,6 @@ void syntax()
|
|||||||
" flexnbd read <IP address> <port> <offset> <length> > data\n"
|
" flexnbd read <IP address> <port> <offset> <length> > data\n"
|
||||||
" flexnbd write <IP address> <port> <offset> <length> < data\n"
|
" flexnbd write <IP address> <port> <offset> <length> < data\n"
|
||||||
" flexnbd write <IP address> <port> <offset> <data file>\n"
|
" flexnbd write <IP address> <port> <offset> <data file>\n"
|
||||||
" flexnbd mirror <IP address> <port> <target IP> <target port>\n"
|
|
||||||
);
|
);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
27
ioutil.c
27
ioutil.c
@@ -47,12 +47,29 @@ char* build_allocation_map(int fd, off64_t size, int resolution)
|
|||||||
if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < -1)
|
if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
for (i=0;i<fiemap->fm_mapped_extents;i++)
|
for (i=0;i<fiemap->fm_mapped_extents;i++) {
|
||||||
bit_set_range(
|
int first_bit = fiemap->fm_extents[i].fe_logical / resolution;
|
||||||
allocation_map,
|
int last_bit = (fiemap->fm_extents[i].fe_logical +
|
||||||
fiemap->fm_extents[i].fe_logical / resolution,
|
fiemap->fm_extents[i].fe_length + resolution - 1) /
|
||||||
fiemap->fm_extents[i].fe_length / resolution
|
resolution;
|
||||||
|
int run = last_bit - first_bit;
|
||||||
|
|
||||||
|
bit_set_range(allocation_map, first_bit, run);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i=0; i<16; i++) {
|
||||||
|
debug("map[%d] = %d%d%d%d%d%d%d%d",
|
||||||
|
i,
|
||||||
|
(allocation_map[i] & 1) == 1,
|
||||||
|
(allocation_map[i] & 2) == 2,
|
||||||
|
(allocation_map[i] & 4) == 4,
|
||||||
|
(allocation_map[i] & 8) == 8,
|
||||||
|
(allocation_map[i] & 16) == 16,
|
||||||
|
(allocation_map[i] & 32) == 32,
|
||||||
|
(allocation_map[i] & 64) == 64,
|
||||||
|
(allocation_map[i] & 128) == 128
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
free(fiemap);
|
free(fiemap);
|
||||||
|
|
||||||
|
2
params.h
2
params.h
@@ -31,7 +31,6 @@ struct mode_serve_params {
|
|||||||
int server;
|
int server;
|
||||||
int threads;
|
int threads;
|
||||||
|
|
||||||
pthread_mutex_t block_allocation_map_lock;
|
|
||||||
char* block_allocation_map;
|
char* block_allocation_map;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -51,7 +50,6 @@ struct client_params {
|
|||||||
off64_t size;
|
off64_t size;
|
||||||
char* mapped;
|
char* mapped;
|
||||||
|
|
||||||
pthread_mutex_t block_allocation_map_lock;
|
|
||||||
char* block_allocation_map;
|
char* block_allocation_map;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
164
serve.c
164
serve.c
@@ -2,6 +2,7 @@
|
|||||||
#include "nbdtypes.h"
|
#include "nbdtypes.h"
|
||||||
#include "ioutil.h"
|
#include "ioutil.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
#include "bitset.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@@ -12,12 +13,114 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
|
static const int block_allocation_resolution = 4096;//128<<10;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* So waiting on client->socket is len bytes of data, and we must write it all
|
||||||
|
* to client->mapped. However while doing do we must consult the bitmap
|
||||||
|
* client->block_allocation_map, which is a bitmap where one bit represents
|
||||||
|
* block_allocation_resolution bytes. Where a bit isn't set, there are no
|
||||||
|
* disc blocks allocated for that portion of the file, and we'd like to keep
|
||||||
|
* it that way.
|
||||||
|
*
|
||||||
|
* If the bitmap shows that every block in our prospective write is already
|
||||||
|
* allocated, we can proceed as normal and make one call to writeloop.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void write_not_zeroes(struct client_params* client, off64_t from, int len)
|
||||||
|
{
|
||||||
|
char *map = client->block_allocation_map;
|
||||||
|
|
||||||
|
while (len > 0) {
|
||||||
|
/* so we have to calculate how much of our input to consider
|
||||||
|
* next based on the bitmap of allocated blocks. This will be
|
||||||
|
* at a coarser resolution than the actual write, which may
|
||||||
|
* not fall on a block boundary at either end. So we look up
|
||||||
|
* how many blocks our write covers, then cut off the start
|
||||||
|
* and end to get the exact number of bytes.
|
||||||
|
*/
|
||||||
|
int first_bit = from/block_allocation_resolution;
|
||||||
|
int last_bit = (from+len+block_allocation_resolution-1) /
|
||||||
|
block_allocation_resolution;
|
||||||
|
int run = bit_run_count(map, first_bit, last_bit-first_bit) *
|
||||||
|
block_allocation_resolution;
|
||||||
|
|
||||||
|
debug("write_not_zeroes: %ld+%d, first_bit=%d, last_bit=%d, run=%d",
|
||||||
|
from, len, first_bit, last_bit, run);
|
||||||
|
|
||||||
|
run -= from % block_allocation_resolution; /*start*/
|
||||||
|
|
||||||
|
if (first_bit == last_bit)
|
||||||
|
run -= block_allocation_resolution -
|
||||||
|
((from+len) % block_allocation_resolution); /*end*/
|
||||||
|
|
||||||
|
debug("run adjusted to %d", run);
|
||||||
|
|
||||||
|
#define DO_READ(dst, len) CLIENT_ERROR_ON_FAILURE( \
|
||||||
|
readloop( \
|
||||||
|
client->socket, \
|
||||||
|
(dst), \
|
||||||
|
(len) \
|
||||||
|
), \
|
||||||
|
"read failed %ld+%d", from, (len) \
|
||||||
|
)
|
||||||
|
|
||||||
|
if (bit_is_set(map, from/8)) {
|
||||||
|
/*debug("writing the lot");*/
|
||||||
|
/* already allocated, just write it all */
|
||||||
|
DO_READ(client->mapped + from, run);
|
||||||
|
len -= run;
|
||||||
|
from += run;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
char zerobuffer[block_allocation_resolution];
|
||||||
|
/* not allocated, read in block_allocation_resoution */
|
||||||
|
while (run > 0) {
|
||||||
|
char *dst = client->mapped+from;
|
||||||
|
int bit = from/block_allocation_resolution;
|
||||||
|
int blockrun = block_allocation_resolution -
|
||||||
|
(from % block_allocation_resolution);
|
||||||
|
if (blockrun > run)
|
||||||
|
blockrun = run;
|
||||||
|
|
||||||
|
/*debug("writing partial: bit=%d, blockrun=%d",
|
||||||
|
bit, blockrun);*/
|
||||||
|
|
||||||
|
DO_READ(zerobuffer, blockrun);
|
||||||
|
|
||||||
|
/* This reads the buffer twice in the worst case
|
||||||
|
* but we're leaning on memcmp failing early
|
||||||
|
* and memcpy being fast, rather than try to
|
||||||
|
* hand-optimized something specific.
|
||||||
|
*/
|
||||||
|
if (zerobuffer[0] != 0 ||
|
||||||
|
memcmp(zerobuffer, zerobuffer + 1, blockrun)) {
|
||||||
|
memcpy(dst, zerobuffer, blockrun);
|
||||||
|
bit_set(map, bit);
|
||||||
|
/*debug("non-zero, copied and set bit %d", bit);*/
|
||||||
|
/* at this point we could choose to
|
||||||
|
* short-cut the rest of the write for
|
||||||
|
* faster I/O but by continuing to do it
|
||||||
|
* the slow way we preserve as much
|
||||||
|
* sparseness as possible.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/*debug("all zero, skip write");*/
|
||||||
|
}
|
||||||
|
len -= blockrun;
|
||||||
|
run -= blockrun;
|
||||||
|
from += blockrun;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int client_serve_request(struct client_params* client)
|
int client_serve_request(struct client_params* client)
|
||||||
{
|
{
|
||||||
off64_t offset;
|
off64_t offset;
|
||||||
struct nbd_request request;
|
struct nbd_request request;
|
||||||
struct nbd_reply reply;
|
struct nbd_reply reply;
|
||||||
// struct unallocated_block** unallocated;
|
|
||||||
|
|
||||||
if (readloop(client->socket, &request, sizeof(request)) == -1) {
|
if (readloop(client->socket, &request, sizeof(request)) == -1) {
|
||||||
if (errno == 0) {
|
if (errno == 0) {
|
||||||
@@ -85,32 +188,25 @@ int client_serve_request(struct client_params* client)
|
|||||||
|
|
||||||
case REQUEST_WRITE:
|
case REQUEST_WRITE:
|
||||||
debug("request write %ld+%d", be64toh(request.from), be32toh(request.len));
|
debug("request write %ld+%d", be64toh(request.from), be32toh(request.len));
|
||||||
#ifdef _LINUX_FIEMAP_H
|
if (client->block_allocation_map) {
|
||||||
unallocated = read_unallocated_blocks(
|
write_not_zeroes(
|
||||||
client->fileno,
|
client,
|
||||||
be64toh(request.from),
|
be64toh(request.from),
|
||||||
be32toh(request.len)
|
|
||||||
);
|
|
||||||
if (unallocated == NULL)
|
|
||||||
CLIENT_ERROR("Couldn't read unallocated blocks list");
|
|
||||||
|
|
||||||
CLIENT_ERROR_ON_FAILURE(
|
|
||||||
read_from_socket_avoiding_holes(
|
|
||||||
client->socket,
|
|
||||||
);
|
|
||||||
free(fiemap);
|
|
||||||
#else
|
|
||||||
CLIENT_ERROR_ON_FAILURE(
|
|
||||||
readloop(
|
|
||||||
client->socket,
|
|
||||||
client->mapped + be64toh(request.from),
|
|
||||||
be32toh(request.len)
|
be32toh(request.len)
|
||||||
),
|
);
|
||||||
"read failed from=%ld, len=%d",
|
}
|
||||||
be64toh(request.from),
|
else {
|
||||||
be32toh(request.len)
|
CLIENT_ERROR_ON_FAILURE(
|
||||||
);
|
readloop(
|
||||||
#endif
|
client->socket,
|
||||||
|
client->mapped + be64toh(request.from),
|
||||||
|
be32toh(request.len)
|
||||||
|
),
|
||||||
|
"read failed from=%ld, len=%d",
|
||||||
|
be64toh(request.from),
|
||||||
|
be32toh(request.len)
|
||||||
|
);
|
||||||
|
}
|
||||||
write(client->socket, &reply, sizeof(reply));
|
write(client->socket, &reply, sizeof(reply));
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@@ -263,8 +359,6 @@ void serve_accept_loop(struct mode_serve_params* params)
|
|||||||
client_params->filename = params->filename;
|
client_params->filename = params->filename;
|
||||||
client_params->block_allocation_map =
|
client_params->block_allocation_map =
|
||||||
params->block_allocation_map;
|
params->block_allocation_map;
|
||||||
client_params->block_allocation_map_lock =
|
|
||||||
params->block_allocation_map_lock;
|
|
||||||
|
|
||||||
client_thread = pthread_create(&client_thread, NULL,
|
client_thread = pthread_create(&client_thread, NULL,
|
||||||
client_serve, client_params);
|
client_serve, client_params);
|
||||||
@@ -275,9 +369,23 @@ void serve_accept_loop(struct mode_serve_params* params)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void serve_init_allocation_map(struct mode_serve_params* params)
|
||||||
|
{
|
||||||
|
int fd = open(params->filename, O_RDONLY);
|
||||||
|
off64_t size;
|
||||||
|
SERVER_ERROR_ON_FAILURE(fd, "Couldn't open %s", params->filename);
|
||||||
|
size = lseek64(fd, 0, SEEK_END);
|
||||||
|
SERVER_ERROR_ON_FAILURE(size, "Couldn't find size of %s",
|
||||||
|
params->filename);
|
||||||
|
params->block_allocation_map =
|
||||||
|
build_allocation_map(fd, size, block_allocation_resolution);
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
void do_serve(struct mode_serve_params* params)
|
void do_serve(struct mode_serve_params* params)
|
||||||
{
|
{
|
||||||
serve_open_socket(params);
|
serve_open_socket(params);
|
||||||
|
serve_init_allocation_map(params);
|
||||||
serve_accept_loop(params);
|
serve_accept_loop(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user