21 Commits

Author SHA1 Message Date
Patrick J Cherry
29306a716e Changed msync to actually sync 2018-01-31 23:03:28 +00:00
Patrick J Cherry
0ba90b39a3 Change from one giant mmap to lots of individual mmaps
This copies xnbd's style
2018-01-31 22:56:59 +00:00
Patrick J Cherry
7f98f6ef9e Add compiler flag to disable explicit msync() calls 2018-01-31 20:45:48 +00:00
Patrick J Cherry
f2fa00260b Merge branch 'avoid-crash-on-timeout' into 'develop'
avoid fatal error on client connection timeout

See merge request open-source/flexnbd-c!36
2018-01-26 16:04:51 +00:00
James F. Carter
b2007c9dad debian: uodate changelog 2018-01-26 15:06:26 +00:00
James F. Carter
9b1781164a avoid fatal error on client connection timeout 2018-01-26 15:03:44 +00:00
Ian Chilton
1f99929589 Merge branch 'develop' into 'develop'
Develop

See merge request open-source/flexnbd-c!35
2018-01-24 12:42:49 +00:00
Chris Cottam
c37627a5b9 not high enough, trying 32MB 2018-01-18 17:08:32 +00:00
Chris Cottam
ceb3328261 increasing the NBD max size to see if it fixes an issue with qemu-2.11.0 2018-01-18 16:52:24 +00:00
Patrick J Cherry
61940bdfc5 Merge branch '34-logging-should-include-the-id-of-the-disc-that-is-being-served' into 'develop'
add a log_context, a string output as part of any log message

Closes #34

See merge request open-source/flexnbd-c!34
2018-01-11 10:35:45 +00:00
James F. Carter
6d96d751d8 debian: update changelog 2018-01-11 10:06:03 +00:00
James F. Carter
fa75de0a8b proxy sets the upstream address and port as its log context 2018-01-11 10:04:18 +00:00
James F. Carter
1cb11bfd38 serve sets the disc's backing file as its log context 2018-01-11 10:03:16 +00:00
James F. Carter
2702e73a26 add a log_context, a string output as part of any log message 2018-01-11 10:01:42 +00:00
Patrick J Cherry
dbf50046a8 Merge branch '33-tcp-keepalive-should-be-applied-to-connection-so-that-dead-connections-can-be-properly-reaped' into 'develop'
apply tcp keepalive to serving sockets

Closes #33

See merge request open-source/flexnbd-c!33
2018-01-10 17:51:02 +00:00
James F. Carter
d62b069ce4 debian: update changelog 2018-01-10 13:58:11 +00:00
James F. Carter
884a714744 whitespace fix 2018-01-10 13:55:05 +00:00
James F. Carter
0c668f1776 remember how || works in C 2018-01-10 13:54:26 +00:00
James F. Carter
1d5b315f17 apply tcp keepalive to serving sockets 2018-01-10 13:49:22 +00:00
Patrick J Cherry
24f1e62a73 Merge branch 'release' into 'develop'
Merge changelog back to develop

See merge request !32
2017-07-14 17:41:51 +01:00
Chris Elsworth
5c37cba39b New release 2017-07-14 17:03:56 +01:00
14 changed files with 149 additions and 33 deletions

View File

@@ -4,7 +4,7 @@ VPATH=src:tests/unit
DESTDIR?=/
PREFIX?=/usr/local/bin
INSTALLDIR=$(DESTDIR)/$(PREFIX)
ifdef DEBUG
CFLAGS_EXTRA=-g -DDEBUG
LDFLAGS_EXTRA=-g
@@ -12,6 +12,11 @@ else
CFLAGS_EXTRA=-O2
endif
NO_MSYNC=1
ifdef NO_MSYNC
CFLAGS_EXTRA += -DNO_MSYNC
endif
CFLAGS_EXTRA += -fPIC --std=gnu99
LDFLAGS_EXTRA += -Wl,--relax,--gc-sections -L$(LIB) -Wl,-rpath-link,$(LIB)

14
debian/changelog vendored
View File

@@ -1,3 +1,17 @@
flexnbd (0.1.8) UNRELEASED; urgency=medium
* Set TCP keepalive on sockets so broken connections are reaped (#33, !33,
!36)
* Add a context to logs to make debugging problems easier (#34, !34)
-- James Carter <james.carter@bytemark.co.uk> Thu, 11 Jan 2018 10:05:35 +0000
flexnbd (0.1.7) stable; urgency=medium
* Return bytes_left in migration statistics.
-- Chris Elsworth <chris.elsworth@bytemark.co.uk> Fri, 14 Jul 2017 17:00:38 +0100
flexnbd (0.1.6) stable; urgency=medium
* Remove lots of per-cpu compiler flags, notably march=native.

View File

@@ -73,7 +73,6 @@ int build_allocation_map(struct bitset * allocation_map, int fd)
return 1;
}
int open_and_mmap(const char* filename, int* out_fd, uint64_t *out_size, void **out_map)
{
/*
@@ -101,7 +100,7 @@ int open_and_mmap(const char* filename, int* out_fd, uint64_t *out_size, void **
*out_size = size;
}
if (out_map) {
if (0) {
*out_map = mmap64(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED,
*out_fd, 0);
if (((long) *out_map) == -1) {
@@ -349,3 +348,36 @@ ssize_t iobuf_write( int fd, struct iobuf *iobuf )
return count;
}
struct iommap *iommap_alloc(int fd, off64_t from, uint64_t len) {
off66_t mmap_from = from & ~((off64_t) getpagesize() - 1);
uint64_t mmap_len = len + (from - mmap_from);
void *mmap_buf = NULL;
// TODO: Check the error code from mmap64
if(mmap_len)
mmap_buf = mmap64(NULL, mmap_len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, mmap_from);
struct iommap *im = xmalloc(sizeof(struct iommap ));
im->mmap_buf = mmap_buf;
im->mmap_len = mmap_len;
im->buf = (char *) mmap_buf + from - mmap_from;
debug("mmapped file from %ld:%d", mmap_from, mmap_len);
return im;
}
void iommap_sync(struct iommap *im) {
if (im->mmap_len)
msync(im->mmap_buf, im->mmap_len, MS_SYNC | MS_INVALIDATE);
return;
}
void iommap_free(struct iommap *im) {
if (im->mmap_len)
munmap(im->mmap_buf, im->mmap_len);
free(im);
}

View File

@@ -11,7 +11,20 @@ struct iobuf {
ssize_t iobuf_read( int fd, struct iobuf* iobuf, size_t default_size );
ssize_t iobuf_write( int fd, struct iobuf* iobuf );
#include <inttypes.h>
struct iommap {
char *buf;
char *mmap_buf;
uint64_t mmap_len;
};
struct iommap *iommap_alloc(int fd, off64_t from, uint64_t len);
void iommap_sync(struct iommap *im);
void iommap_free(struct iommap *im);
#include "serve.h"
struct bitset; /* don't need whole of bitset.h here */
/** Scan the file opened in ''fd'', set bits in ''allocation_map'' that

View File

@@ -16,7 +16,7 @@
/* 1MiB is the de-facto standard for maximum size of header + data */
#define NBD_MAX_SIZE ( 1024 * 1024 )
#define NBD_MAX_SIZE ( 32 * 1024 * 1024 )
#define NBD_REQUEST_SIZE ( sizeof( struct nbd_request_raw ) )
#define NBD_REPLY_SIZE ( sizeof( struct nbd_reply_raw ) )

View File

@@ -68,6 +68,37 @@ int sock_set_reuseaddr( int fd, int optval )
return setsockopt( fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval) );
}
int sock_set_keepalive_params( int fd, int time, int intvl, int probes)
{
if (sock_set_keepalive(fd, 1) ||
sock_set_tcp_keepidle(fd, time) ||
sock_set_tcp_keepintvl(fd, intvl) ||
sock_set_tcp_keepcnt(fd, probes)) {
return -1;
}
return 0;
}
int sock_set_keepalive( int fd, int optval )
{
return setsockopt( fd, SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval) );
}
int sock_set_tcp_keepidle( int fd, int optval )
{
return setsockopt( fd, IPPROTO_TCP, TCP_KEEPIDLE, &optval, sizeof(optval) );
}
int sock_set_tcp_keepintvl( int fd, int optval )
{
return setsockopt( fd, IPPROTO_TCP, TCP_KEEPINTVL, &optval, sizeof(optval) );
}
int sock_set_tcp_keepcnt( int fd, int optval )
{
return setsockopt( fd, IPPROTO_TCP, TCP_KEEPCNT, &optval, sizeof(optval) );
}
/* Set the tcp_nodelay option */
int sock_set_tcp_nodelay( int fd, int optval )
{

View File

@@ -14,9 +14,24 @@ size_t sockaddr_size(const struct sockaddr* sa);
*/
const char* sockaddr_address_string(const struct sockaddr* sa, char* dest, size_t len);
/* Configure TCP keepalive on a socket */
int sock_set_keepalive_params( int fd, int time, int intvl, int probes);
/* Set the SOL_KEEPALIVE otion */
int sock_set_keepalive(int fd, int optval);
/* Set the SOL_REUSEADDR otion */
int sock_set_reuseaddr(int fd, int optval);
/* Set the tcp_keepidle option */
int sock_set_tcp_keepidle(int fd, int optval);
/* Set the tcp_keepintvl option */
int sock_set_tcp_keepintvl(int fd, int optval);
/* Set the tcp_keepcnt option */
int sock_set_tcp_keepcnt(int fd, int optval);
/* Set the tcp_nodelay option */
int sock_set_tcp_nodelay(int fd, int optval);

View File

@@ -13,6 +13,7 @@
pthread_key_t cleanup_handler_key;
int log_level = 2;
char *log_context = "";
void error_init(void)
{

View File

@@ -21,6 +21,9 @@ extern int log_level;
/* set up the error globals */
void error_init(void);
/* some context for the overall process that appears on each log line */
extern char *log_context;
void exit_err( const char * );
@@ -92,7 +95,7 @@ uint64_t monotonic_time_ms(void);
#define levstr(i) (i==0?'D':(i==1?'I':(i==2?'W':(i==3?'E':'F'))))
#define myloglev(level, msg, ...) mylog( level, "%"PRIu64":%c:%d %p %s:%d: "msg"\n", monotonic_time_ms(), levstr(level), getpid(),pthread_self(), __FILE__, __LINE__, ##__VA_ARGS__ )
#define myloglev(level, msg, ...) mylog( level, "%"PRIu64":%c:%d %p %s %s:%d: "msg"\n", monotonic_time_ms(), levstr(level), getpid(),pthread_self(), log_context, __FILE__, __LINE__, ##__VA_ARGS__ )
#ifdef DEBUG
# define debug(msg, ...) myloglev(0, msg, ##__VA_ARGS__)

View File

@@ -79,6 +79,9 @@ struct proxier* proxy_create(
out->req.buf = xmalloc( NBD_MAX_SIZE );
out->rsp.buf = xmalloc( NBD_MAX_SIZE );
log_context = xmalloc( strlen(s_upstream_address) + strlen(s_upstream_port) + 2 );
sprintf(log_context, "%s:%s", s_upstream_address, s_upstream_port);
return out;
}

View File

@@ -118,6 +118,7 @@ void write_not_zeroes(struct client* client, uint64_t from, uint64_t len)
*/
uint64_t run = bitset_run_count(map, from, len);
struct iommap *iommap = iommap_alloc(client->fileno, from, len);
debug("write_not_zeroes: from=%ld, len=%d, run=%d", from, len, run);
@@ -155,7 +156,7 @@ void write_not_zeroes(struct client* client, uint64_t from, uint64_t len)
if (bitset_is_set_at(map, from)) {
debug("writing the lot: from=%ld, run=%d", from, run);
/* already allocated, just write it all */
DO_READ(client->mapped + from, run);
DO_READ(iommap->buf, run);
/* We know from our earlier call to bitset_run_count that the
* bitset is all-1s at this point, but we need to dirty it for the
* sake of the event stream - the actual bytes have changed, and we
@@ -186,7 +187,7 @@ void write_not_zeroes(struct client* client, uint64_t from, uint64_t len)
(0 == memcmp( zerobuffer, zerobuffer+1, blockrun-1 ));
if ( !all_zeros ) {
memcpy(client->mapped+from, zerobuffer, blockrun);
memcpy(iommap->buf, zerobuffer, blockrun);
bitset_set_range(map, from, blockrun);
/* at this point we could choose to
* short-cut the rest of the write for
@@ -205,6 +206,8 @@ void write_not_zeroes(struct client* client, uint64_t from, uint64_t len)
from += blockrun;
}
}
iommap_sync(iommap);
iommap_free(iommap);
}
}
@@ -234,6 +237,10 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
debug( "Connection reset while"
" reading request" );
return 0;
case ETIMEDOUT:
debug( "Connection timed out while"
" reading request" );
return 0;
default:
/* FIXME: I've seen this happen, but I
* couldn't reproduce it so I'm leaving
@@ -445,22 +452,28 @@ void client_reply_to_read( struct client* client, struct nbd_request request )
void client_reply_to_write( struct client* client, struct nbd_request request )
{
debug("request write from=%"PRIu64", len=%"PRIu32", handle=0x%08X", request.from, request.len, request.handle);
if (client->serve->allocation_map_built) {
// TODO: Just write directly for now. Not (yet) convinced my changes later on work.
// if (client->serve->allocation_map_built) {
if (0) {
write_not_zeroes( client, request.from, request.len );
}
else {
debug("No allocation map, writing directly.");
/* If we get cut off partway through reading this data:
* */
struct iommap *iommap = iommap_alloc(client->fileno, request.from, request.len);
ERROR_IF_NEGATIVE(
readloop( client->socket,
client->mapped + request.from,
iommap->buf,
request.len),
"reading write data failed from=%ld, len=%d",
request.from,
request.len
);
iommap_sync(iommap);
iommap_free(iommap);
/* the allocation_map is shared between client threads, and may be
* being built. We need to reflect the write in it, as it may be in
* a position the builder has already gone over.
@@ -468,19 +481,6 @@ void client_reply_to_write( struct client* client, struct nbd_request request )
bitset_set_range(client->serve->allocation_map, request.from, request.len);
}
if (1) /* not sure whether this is necessary... */
{
/* multiple of 4K page size */
uint64_t from_rounded = request.from & (!0xfff);
uint64_t len_rounded = request.len + (request.from - from_rounded);
FATAL_IF_NEGATIVE(
msync( client->mapped + from_rounded,
len_rounded,
MS_SYNC | MS_INVALIDATE),
"msync failed %ld %ld", request.from, request.len
);
}
client_write_reply( client, &request, 0);
}
@@ -646,9 +646,6 @@ void client_cleanup(struct client* client,
debug("Closed client socket fd %d", client->socket);
client->socket = -1;
}
if (client->mapped) {
munmap(client->mapped, client->serve->size);
}
if (client->fileno) {
FATAL_IF_NEGATIVE( close(client->fileno),
"Error closing file %d",
@@ -664,25 +661,20 @@ void client_cleanup(struct client* client,
void* client_serve(void* client_uncast)
{
struct client* client = (struct client*) client_uncast;
void** a = NULL;
error_set_handler((cleanup_handler*) client_cleanup, client);
info("client: mmaping file");
FATAL_IF_NEGATIVE(
open_and_mmap(
client->serve->filename,
&client->fileno,
NULL,
(void**) &client->mapped
a
),
"Couldn't open/mmap file %s: %s", client->serve->filename, strerror( errno )
);
FATAL_IF_NEGATIVE(
madvise( client->mapped, client->serve->size, MADV_RANDOM ),
SHOW_ERRNO( "Failed to madvise() %s", client->serve->filename )
);
debug( "Opened client file fd %d", client->fileno);
debug("client: sending hello");
client_send_hello(client);

View File

@@ -29,7 +29,6 @@ struct client {
int socket;
int fileno;
char* mapped;
struct self_pipe * stop_signal;

View File

@@ -78,6 +78,8 @@ struct server * server_create (
NULLCHECK( out->close_signal );
NULLCHECK( out->acl_updated_signal );
log_context = s_file;
return out;
}
@@ -422,6 +424,9 @@ void accept_nbd_client(
int slot;
char s_client_address[64] = {0};
FATAL_IF_NEGATIVE( sock_set_keepalive_params( client_fd, CLIENT_KEEPALIVE_TIME, CLIENT_KEEPALIVE_INTVL, CLIENT_KEEPALIVE_PROBES),
"Error setting keepalive parameters on client socket fd %d", client_fd );
if ( !server_should_accept_client( params, client_address, s_client_address, 64 ) ) {
FATAL_IF_NEGATIVE( close( client_fd ),

View File

@@ -21,6 +21,9 @@ struct client_tbl_entry {
#define MAX_NBD_CLIENTS 16
#define CLIENT_KEEPALIVE_TIME 30
#define CLIENT_KEEPALIVE_INTVL 10
#define CLIENT_KEEPALIVE_PROBES 3
struct server {
/* The flexnbd wrapper this server is attached to */
struct flexnbd * flexnbd;