Compare commits
180 Commits
0.1.0
...
take-reque
Author | SHA1 | Date | |
---|---|---|---|
![]() |
cc69752394 | ||
![]() |
af2bee79fc | ||
![]() |
c37627a5b9 | ||
![]() |
ceb3328261 | ||
![]() |
61940bdfc5 | ||
![]() |
6d96d751d8 | ||
![]() |
fa75de0a8b | ||
![]() |
1cb11bfd38 | ||
![]() |
2702e73a26 | ||
![]() |
dbf50046a8 | ||
![]() |
d62b069ce4 | ||
![]() |
884a714744 | ||
![]() |
0c668f1776 | ||
![]() |
1d5b315f17 | ||
![]() |
24f1e62a73 | ||
![]() |
5c37cba39b | ||
![]() |
59f264184b | ||
![]() |
42d206cfb7 | ||
![]() |
ab3106202a | ||
![]() |
e04dead5ce | ||
![]() |
88bc5f0643 | ||
![]() |
e89c87e2b9 | ||
![]() |
9d2ac3f403 | ||
![]() |
67823bf85b | ||
![]() |
17d30b86ad | ||
![]() |
b97bcd6f51 | ||
![]() |
4d3c15a4d0 | ||
![]() |
83d6872a8d | ||
![]() |
ab8470aef3 | ||
![]() |
716df32fd6 | ||
![]() |
1a768d5e9c | ||
![]() |
72992c76ac | ||
![]() |
cace8123f4 | ||
![]() |
c3b241464a | ||
![]() |
4f956e4b9d | ||
![]() |
b4cb2d9240 | ||
![]() |
1efb7bada6 | ||
![]() |
6bc2a4c0b9 | ||
![]() |
59de76c50c | ||
![]() |
209da655b3 | ||
![]() |
52b45e6b40 | ||
![]() |
d279eb7570 | ||
![]() |
c07df76ede | ||
![]() |
e7e99b099c | ||
![]() |
b2edd0734a | ||
![]() |
e19d005636 | ||
![]() |
d1e6e835c4 | ||
![]() |
8fed794fe7 | ||
![]() |
e24efa9864 | ||
![]() |
3134d619ef | ||
![]() |
898f3f6c7e | ||
![]() |
5a1bc21088 | ||
![]() |
deb8f2c53b | ||
![]() |
1338d9e910 | ||
![]() |
47c05174b6 | ||
![]() |
191b3bc72c | ||
![]() |
770ca0d0e5 | ||
![]() |
6505588f25 | ||
![]() |
957707bcfc | ||
![]() |
3f01b77221 | ||
![]() |
0dbea7f8fe | ||
![]() |
091aacd16d | ||
![]() |
04b6637451 | ||
![]() |
7d2eda6cea | ||
![]() |
7e152ca4f2 | ||
![]() |
fe0125efbc | ||
![]() |
ebaaa6d671 | ||
![]() |
8cc8588744 | ||
![]() |
5da77ea39a | ||
![]() |
a744965c67 | ||
![]() |
d07659f694 | ||
![]() |
30562ed900 | ||
![]() |
93c0fa2e92 | ||
![]() |
8dc491fb89 | ||
![]() |
ea7cd64fc2 | ||
![]() |
35d3340708 | ||
![]() |
d47a44a204 | ||
![]() |
d6968d8242 | ||
![]() |
bf85e329a0 | ||
![]() |
edcaef532c | ||
![]() |
cb920e4e9d | ||
![]() |
91d85633b6 | ||
![]() |
7c516b85a6 | ||
![]() |
679fa6dbf8 | ||
![]() |
50708326ec | ||
![]() |
d907025d71 | ||
![]() |
e4d398a078 | ||
![]() |
8de0780125 | ||
![]() |
0fd16822ea | ||
![]() |
1e3c61b541 | ||
![]() |
a09e14b2d4 | ||
![]() |
a6710b6c32 | ||
![]() |
ed3995303f | ||
![]() |
f5de8fb12b | ||
![]() |
99a5f79a52 | ||
![]() |
356e1fd6a1 | ||
![]() |
67dcea207d | ||
![]() |
d3762162db | ||
![]() |
3571d3f82e | ||
![]() |
4cd7e764bb | ||
![]() |
4f535fbb02 | ||
![]() |
218c55fb63 | ||
![]() |
956a602475 | ||
![]() |
26a0a82f9d | ||
![]() |
76e0476113 | ||
![]() |
d9651a038c | ||
![]() |
fcd3d33498 | ||
![]() |
e3360a3a1b | ||
![]() |
1fefe1a669 | ||
![]() |
4ed8d49b2c | ||
![]() |
3af0e84f5f | ||
![]() |
ba14943b60 | ||
![]() |
4a709e73f8 | ||
![]() |
91a8946ddc | ||
![]() |
20f99b4554 | ||
![]() |
c363991cfd | ||
![]() |
c41eeff2fc | ||
![]() |
5960e4d10b | ||
![]() |
f0911b5c6c | ||
![]() |
b063f41ba8 | ||
![]() |
28c7e43e45 | ||
![]() |
9326b6b882 | ||
![]() |
f93476ebd3 | ||
![]() |
666b60ae1c | ||
![]() |
f48bf2b296 | ||
![]() |
705164ae3b | ||
![]() |
dbe7053bf3 | ||
![]() |
fa8023cf69 | ||
![]() |
aba802d415 | ||
![]() |
d146102c2c | ||
![]() |
5551373073 | ||
![]() |
77f333423b | ||
![]() |
ffa45879d7 | ||
![]() |
2fa1ce8e6b | ||
![]() |
6f540ce238 | ||
![]() |
f9a3447bc9 | ||
![]() |
7806ec11ee | ||
![]() |
1817c13acb | ||
![]() |
97c8d7a358 | ||
![]() |
8cf92af900 | ||
![]() |
5185be39c9 | ||
![]() |
374b4c616e | ||
![]() |
50ec8fb7cc | ||
![]() |
5fc9ad6fd8 | ||
![]() |
85c463c4bd | ||
![]() |
278a3151a8 | ||
![]() |
0ea66b1e04 | ||
![]() |
83e3d65be9 | ||
![]() |
4f31bd9340 | ||
![]() |
0baf93fd7b | ||
![]() |
175f19b3e7 | ||
![]() |
8d56316548 | ||
![]() |
27f2cc7083 | ||
![]() |
8084a41ad2 | ||
![]() |
5ca5858929 | ||
![]() |
afcc07a181 | ||
![]() |
dcead04cf6 | ||
![]() |
4f7f5f1745 | ||
![]() |
976e9ba07f | ||
![]() |
91d9531a60 | ||
![]() |
905d66af77 | ||
![]() |
eee7c9644c | ||
![]() |
ce5c51cdcf | ||
![]() |
c6c53c63ba | ||
![]() |
20bd58749e | ||
![]() |
866bf835e6 | ||
![]() |
53cbe14556 | ||
![]() |
cd3281f62d | ||
![]() |
1e5457fed0 | ||
![]() |
0753369b77 | ||
![]() |
9d9ae40953 | ||
![]() |
65d4f581b9 | ||
![]() |
77c71ccf09 | ||
![]() |
97a923afdf | ||
![]() |
335261869d | ||
![]() |
8cf9cae8c0 | ||
![]() |
6986c70888 | ||
![]() |
4b9ded0e1d | ||
![]() |
b177faacd6 | ||
![]() |
96e60a4a29 |
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
**/*.o
|
||||
**/*~
|
||||
flexnbd
|
||||
build/
|
||||
pkg/
|
||||
**/*.orig
|
||||
**/.*.swp
|
||||
cscope.out
|
||||
valgrind.out
|
27
.gitlab-ci.yml
Normal file
27
.gitlab-ci.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
stages:
|
||||
- package
|
||||
- publish
|
||||
|
||||
package:jessie: &package
|
||||
stage: package
|
||||
image: $CI_REGISTRY/docker-images/layers:$DISTRO-deb
|
||||
variables:
|
||||
DISTRO: jessie
|
||||
script:
|
||||
- package
|
||||
artifacts:
|
||||
paths:
|
||||
- pkg/
|
||||
|
||||
package:stretch:
|
||||
<<: *package
|
||||
variables:
|
||||
DISTRO: stretch
|
||||
|
||||
publish:
|
||||
stage: publish
|
||||
tags:
|
||||
- shell
|
||||
script:
|
||||
- publish
|
||||
|
@@ -1,9 +0,0 @@
|
||||
.o$
|
||||
~$
|
||||
^flexnbd$
|
||||
^build/
|
||||
^pkg/
|
||||
\.orig$
|
||||
.*\.swp$
|
||||
cscope.out$
|
||||
valgrind.out$
|
116
Makefile
116
Makefile
@@ -1,10 +1,116 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
all:
|
||||
rake build
|
||||
VPATH=src:tests/unit
|
||||
DESTDIR?=/
|
||||
PREFIX?=/usr/local/bin
|
||||
INSTALLDIR=$(DESTDIR)/$(PREFIX)
|
||||
|
||||
ifdef DEBUG
|
||||
CFLAGS_EXTRA=-g -DDEBUG
|
||||
LDFLAGS_EXTRA=-g
|
||||
else
|
||||
CFLAGS_EXTRA=-O2
|
||||
endif
|
||||
|
||||
all-debug:
|
||||
DEBUG=1 rake build
|
||||
CFLAGS_EXTRA += -fPIC --std=gnu99
|
||||
LDFLAGS_EXTRA += -Wl,--relax,--gc-sections -L$(LIB) -Wl,-rpath-link,$(LIB)
|
||||
|
||||
# The -Wunreachable-code warning is only implemented in clang, but it
|
||||
# doesn't break anything for gcc to see it.
|
||||
WARNINGS=-Wall \
|
||||
-Wextra \
|
||||
-Werror-implicit-function-declaration \
|
||||
-Wstrict-prototypes \
|
||||
-Wno-missing-field-initializers \
|
||||
-Wunreachable-code
|
||||
|
||||
CCFLAGS=-D_GNU_SOURCE=1 $(WARNINGS) $(CFLAGS_EXTRA) $(CFLAGS)
|
||||
LLDFLAGS=-lm -lrt -lev $(LDFLAGS_EXTRA) $(LDFLAGS)
|
||||
|
||||
CC?=gcc
|
||||
|
||||
LIBS=-lpthread
|
||||
INC=-I/usr/include/libev -Isrc/common -Isrc/server -Isrc/proxy
|
||||
COMPILE=$(CC) -MMD $(INC) -c $(CCFLAGS)
|
||||
LINK=$(CC) $(LLDFLAGS) -Isrc $(LIBS)
|
||||
|
||||
LIB=build/
|
||||
|
||||
COMMON_SRC := $(wildcard src/common/*.c)
|
||||
SERVER_SRC := $(wildcard src/server/*.c)
|
||||
PROXY_SRC := $(wildcard src/proxy/*.c)
|
||||
|
||||
COMMON_OBJ := $(COMMON_SRC:src/%.c=build/%.o)
|
||||
SERVER_OBJ := $(SERVER_SRC:src/%.c=build/%.o)
|
||||
PROXY_OBJ := $(PROXY_SRC:src/%.c=build/%.o)
|
||||
|
||||
SRCS := $(COMMON_SRC) $(SERVER_SRC) $(PROXY_SRC)
|
||||
OBJS := $(COMMON_OBJ) $(SERVER_OBJ) $(PROXY_OBJ)
|
||||
|
||||
|
||||
all: build doc
|
||||
|
||||
build: server proxy
|
||||
|
||||
build/%.o: %.c
|
||||
mkdir -p $(dir $@)
|
||||
$(COMPILE) $< -o $@
|
||||
|
||||
objs: $(OBJS)
|
||||
|
||||
build/flexnbd: $(COMMON_OBJ) $(SERVER_OBJ) build/main.o
|
||||
$(LINK) $^ -o $@
|
||||
|
||||
build/flexnbd-proxy: $(COMMON_OBJ) $(PROXY_OBJ) build/proxy-main.o
|
||||
$(LINK) $^ -o $@
|
||||
|
||||
server: build/flexnbd
|
||||
|
||||
proxy: build/flexnbd-proxy
|
||||
|
||||
CHECK_SRC := $(wildcard tests/unit/*.c)
|
||||
CHECK_OBJ := $(CHECK_SRC:tests/unit/%.c=build/%.o)
|
||||
# Why can't we reuse the build/%.o rule above? Not sure.
|
||||
|
||||
CHECK_BINS := $(CHECK_SRC:tests/unit/%.c=build/%)
|
||||
|
||||
build/check_%: build/check_%.o
|
||||
$(LINK) $^ -o $@ $(COMMON_OBJ) $(SERVER_OBJ) -lcheck -lsubunit
|
||||
|
||||
check_objs: $(CHECK_OBJ)
|
||||
|
||||
check_bins: $(CHECK_BINS)
|
||||
|
||||
check: $(OBJS) $(CHECK_BINS)
|
||||
r=true ; for bin in $(CHECK_BINS); do $$bin || r=false; done ; $$r
|
||||
|
||||
acceptance: build
|
||||
cd tests/acceptance && RUBYOPT='-I.' ruby nbd_scenarios -v
|
||||
|
||||
test: check acceptance
|
||||
|
||||
build/flexnbd.1: README.txt
|
||||
txt2man -t flexnbd -s 1 $< > $@
|
||||
|
||||
build/flexnbd-proxy.1: README.proxy.txt
|
||||
txt2man -t flexnbd-proxy -s 1 $< > $@
|
||||
|
||||
# If we don't pipe to file, gzip clobbers the original, causing make
|
||||
# to rebuild each time
|
||||
%.1.gz: %.1
|
||||
gzip -c -f $< > $@
|
||||
|
||||
doc: build/flexnbd.1.gz build/flexnbd-proxy.1.gz
|
||||
|
||||
install:
|
||||
mkdir -p $(INSTALLDIR)
|
||||
cp build/flexnbd build/flexnbd-proxy $(INSTALLDIR)
|
||||
|
||||
clean:
|
||||
rake clean
|
||||
rm -rf build/*
|
||||
|
||||
|
||||
.PHONY: clean objs check_objs all server proxy check_bins check doc build test acceptance
|
||||
|
||||
# Include extra dependencies at the end, NOT before 'all'
|
||||
-include $(wildcard build/*.d)
|
||||
|
148
README.proxy.txt
148
README.proxy.txt
@@ -1,19 +1,14 @@
|
||||
FLEXNBD-PROXY(1)
|
||||
================
|
||||
:doctype: manpage
|
||||
|
||||
NAME
|
||||
----
|
||||
|
||||
flexnbd-proxy - A simple NBD proxy
|
||||
flexnbd-proxy - A simple NBD proxy
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
|
||||
*flexnbd-proxy* ['OPTIONS']
|
||||
flexnbd-proxy --addr ADDR [--port PORT] --conn-addr ADDR
|
||||
--conn-port PORT [--bind ADDR] [--cache[=CACHE_BYTES]]
|
||||
[--help] [--verbose] [--quiet]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
|
||||
flexnbd-proxy is a simple NBD proxy server that implements resilient
|
||||
connection logic for the client. It connects to an upstream NBD server
|
||||
@@ -25,10 +20,6 @@ of view of the client) reconnects and retransmits the request, before
|
||||
returning the response to the client.
|
||||
|
||||
USAGE
|
||||
-----
|
||||
|
||||
$ flexnbd-proxy --addr <ADDR> [ --port <PORT> ]
|
||||
--conn-addr <ADDR> --conn-port <PORT> [--bind <ADDR>] [option]*
|
||||
|
||||
Proxy requests from an NBD client to an NBD server, resiliently. Only one
|
||||
client can be connected at a time, and ACLs cannot be applied to the client, as they
|
||||
@@ -57,71 +48,73 @@ Only one request may be in-flight at a time under the current architecture; that
|
||||
doesn't seem to slow things down much relative to alternative options, but may
|
||||
be changed in the future if it becomes an issue.
|
||||
|
||||
Options
|
||||
~~~~~~~
|
||||
OPTIONS
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
--addr, -l ADDR
|
||||
The address to listen on. If this begins with a '/', it is assumed to be
|
||||
a UNIX domain socket to create. Otherwise, it should be an IPv4 or IPv6
|
||||
address.
|
||||
*--port, -p PORT*:
|
||||
|
||||
--port, -p PORT
|
||||
The port to listen on, if --addr is not a UNIX socket.
|
||||
|
||||
*--conn-addr, -C ADDR*:
|
||||
--conn-addr, -C ADDR
|
||||
The address of the NBD server to connect to. Required.
|
||||
|
||||
*--conn-port, -P PORT*:
|
||||
--conn-port, -P PORT
|
||||
The port of the NBD server to connect to. Required.
|
||||
|
||||
*--help, -h* :
|
||||
--cache, -c=CACHE_BYTES
|
||||
If given, the size in bytes of read cache to use. CACHE_BYTES
|
||||
defaults to 4096.
|
||||
|
||||
--help, -h
|
||||
Show command or global help.
|
||||
|
||||
*--verbose, -v* :
|
||||
--verbose, -v
|
||||
Output all available log information to STDERR.
|
||||
|
||||
*--quiet, -q* :
|
||||
--quiet, -q
|
||||
Output as little log information as possible to STDERR.
|
||||
|
||||
|
||||
LOGGING
|
||||
-------
|
||||
Log output is sent to STDERR. If --quiet is set, no output will be seen
|
||||
unless the program termintes abnormally. If neither --quiet nor
|
||||
|
||||
Log output is sent to STDERR. If --quiet is set, no output will be
|
||||
seen unless the program termintes abnormally. If neither --quiet nor
|
||||
--verbose are set, no output will be seen unless something goes wrong
|
||||
with a specific request. If --verbose is given, every available log
|
||||
message will be seen (which, for a debug build, is many). It is not an
|
||||
error to set both --verbose and --quiet. The last one wins.
|
||||
with a specific request. If --verbose is given, every available log
|
||||
message will be seen (which, for a debug build, is many). It is not an
|
||||
error to set both --verbose and --quiet. The last one wins.
|
||||
|
||||
The log line format is:
|
||||
|
||||
<TIMESTAMP>:<LEVEL>:<PID> <THREAD> <SOURCEFILE>:<SOURCELINE>: <MSG>
|
||||
<TIMESTAMP>:<LEVEL>:<PID> <THREAD> <SOURCEFILE>:<SOURCELINE>: <MSG>
|
||||
|
||||
*TIMESTAMP*:
|
||||
<TIMESTAMP>
|
||||
Time the log entry was made. This is expressed in terms of monotonic ms
|
||||
|
||||
*LEVEL*:
|
||||
<LEVEL>
|
||||
This will be one of 'D', 'I', 'W', 'E', 'F' in increasing order of
|
||||
severity. If flexnbd is started with the --quiet flag, only 'F' will be
|
||||
seen. If it is started with the --verbose flag, any from 'I' upwards
|
||||
will be seen. Only if you have a debug build and start it with
|
||||
--verbose will you see 'D' entries.
|
||||
severity. If flexnbd is started with the --quiet flag, only 'F' will
|
||||
be seen. If it is started with the --verbose flag, any from 'I'
|
||||
upwards will be seen. Only if you have a debug build and start it
|
||||
with --verbose will you see 'D' entries.
|
||||
|
||||
*PID*:
|
||||
<PID>
|
||||
This is the process ID.
|
||||
|
||||
*THREAD*:
|
||||
flexnbd-proxy is currently single-threaded, so this should be the same
|
||||
for all lines. That may not be the case in the future.
|
||||
<THREAD>
|
||||
flexnbd-proxy is currently single-threaded, so this should be the
|
||||
same for all lines. That may not be the case in the future.
|
||||
|
||||
*SOURCEFILE:SOURCELINE*:
|
||||
<SOURCEFILE:SOURCELINE>
|
||||
Identifies where in the source code this log line can be found.
|
||||
|
||||
*MSG*:
|
||||
<MSG>
|
||||
A short message describing what's happening, how it's being done, or
|
||||
if you're very lucky *why* it's going on.
|
||||
if you're very lucky why it's going on.
|
||||
|
||||
Proxying
|
||||
~~~~~~~~
|
||||
EXAMPLES
|
||||
|
||||
The main point of the proxy mode is to allow clients that would otherwise break
|
||||
when the NBD server goes away (during a migration, for instance) to see a
|
||||
@@ -154,31 +147,60 @@ The proxy notices and reconnects, fulfiling any request it has in its buffer.
|
||||
The data in myfile has been moved between physical servers without the nbd
|
||||
client process having to be disturbed at all.
|
||||
|
||||
BUGS
|
||||
----
|
||||
READ CACHE
|
||||
|
||||
Should be reported to nick@bytemark.co.uk.
|
||||
If the --cache option is given at the command line, either without an
|
||||
argument or with an argument greater than 0, flexnbd-proxy will use a
|
||||
read-ahead cache. The cache as currently implemented doubles each read
|
||||
request size, up to a maximum of 2xCACHE_BYTES, and retains the latter
|
||||
half in a buffer. If the next read request from the client exactly
|
||||
matches the region held in the buffer, flexnbd-proxy responds from the
|
||||
cache without making a request to the server.
|
||||
|
||||
This pattern is designed to match sequential reads, such as those
|
||||
performed by a booting virtual machine.
|
||||
|
||||
Note: If specifying a cache size, you must use this form:
|
||||
|
||||
nbd-client$ flexnbd-proxy --cache=XXXX
|
||||
|
||||
That is, the '=' is required. This is a limitation of getopt-long.
|
||||
|
||||
If no cache size is given, a size of 4096 bytes is assumed. Caching can
|
||||
be explicitly disabled by setting a size of 0.
|
||||
|
||||
BUGS
|
||||
|
||||
Should be reported via GitHub.
|
||||
|
||||
* https://github.com/BytemarkHosting/flexnbd-c/issues
|
||||
|
||||
Current issues include:
|
||||
|
||||
* Only old-style NBD negotiation is supported
|
||||
* Only one request may be in-flight at a time
|
||||
* All I/O is blocking, and signals terminate the process immediately
|
||||
* UNIX socket support is limited to the listen address
|
||||
* FLUSH and TRIM commands, and the FUA flag, are not supported
|
||||
* DISCONNECT requests do not get passed through to the NBD server
|
||||
* No active timeout-retry of requests - we trust the kernel's idea of failure
|
||||
* only old-style NBD negotiation is supported;
|
||||
* only one request may be in-flight at a time;
|
||||
* all I/O is blocking, and signals terminate the process immediately;
|
||||
* UNIX socket support is limited to the listen address;
|
||||
* FLUSH and TRIM commands, and the FUA flag, are not supported;
|
||||
* DISCONNECT requests do not get passed through to the NBD server;
|
||||
* no active timeout-retry of requests - we trust the kernel's idea of
|
||||
failure.
|
||||
|
||||
AUTHOR
|
||||
------
|
||||
Written by Alex Young <alex@bytemark.co.uk>.
|
||||
|
||||
Originally written by Alex Young <alex@blackkettle.org>.
|
||||
Original concept and core code by Matthew Bloch <matthew@bytemark.co.uk>.
|
||||
Proxy mode written by Nick Thomas <nick@bytemark.co.uk>
|
||||
Proxy mode written by Nick Thomas <me@ur.gs>.
|
||||
|
||||
COPYING
|
||||
-------
|
||||
The full commit history is available on GitHub.
|
||||
|
||||
Copyright (c) 2012 Bytemark Hosting Ltd. Free use of this software is
|
||||
granted under the terms of the GNU General Public License version 3 or
|
||||
later.
|
||||
SEE ALSO
|
||||
|
||||
flexnbd(1), nbd-client(8), xnbd-server(8), xnbd-client(8)
|
||||
|
||||
COPYRIGHT
|
||||
|
||||
Copyright (c) 2012-2016 Bytemark Hosting Ltd. Free use of this
|
||||
software is granted under the terms of the GNU General Public License
|
||||
version 3 or later.
|
||||
|
||||
|
373
README.txt
373
README.txt
@@ -1,17 +1,36 @@
|
||||
FLEXNBD(1)
|
||||
==========
|
||||
:doctype: manpage
|
||||
|
||||
NAME
|
||||
----
|
||||
|
||||
flexnbd - A fast NBD server
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
*flexnbd* 'COMMAND' ['OPTIONS']
|
||||
|
||||
flexnbd MODE [ ARGS ]
|
||||
|
||||
flexnbd serve --addr ADDR --port PORT --file FILE [--sock SOCK]
|
||||
[--default-deny] [--killswitch] [global_option]* [acl_entry]*
|
||||
|
||||
flexnbd listen --addr ADDR --port PORT --file FILE [--sock SOCK]
|
||||
[--default-deny] [global_option]* [acl_entry]*
|
||||
|
||||
flexnbd mirror --addr ADDR --port PORT --sock SOCK [--unlink]
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
flexnbd acl --sock SOCK [acl_entry]+ [global_option]*
|
||||
|
||||
flexnbd break --sock SOCK [global_option]*
|
||||
|
||||
flexnbd status --sock SOCK [global_option]*
|
||||
|
||||
flexnbd read --addr ADDR --port PORT --from OFFSET --size SIZE
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
flexnbd write --addr ADDR --port PORT --from OFFSET --size SIZE
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
flexnbd help [mode] [global_option]*
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
|
||||
Flexnbd is a fast NBD server which supports live migration. Live
|
||||
migration is performed by writing the data to a new server. A failed
|
||||
migration will be invisible to any connected clients.
|
||||
@@ -19,298 +38,290 @@ migration will be invisible to any connected clients.
|
||||
Flexnbd tries quite hard to preserve sparsity of files it is serving,
|
||||
even across migrations.
|
||||
|
||||
COMMANDS
|
||||
--------
|
||||
SERVE MODE
|
||||
|
||||
Serve a file.
|
||||
|
||||
serve
|
||||
~~~~~
|
||||
$ flexnbd serve --addr <ADDR> --port <PORT> --file <FILE>
|
||||
[--sock <SOCK>] [--default-deny] [global option]* [acl entry]*
|
||||
[--sock <SOCK>] [--default-deny] [-k] [global_option]*
|
||||
[acl_entry]*
|
||||
|
||||
Serve a file. If any ACL entries are given (which should be IP
|
||||
If any ACL entries are given (which should be IP
|
||||
addresses), only those clients listed will be permitted to connect.
|
||||
|
||||
flexnbd will continue to serve until a SIGINT, SIGQUIT, or a successful
|
||||
migration.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
OPTIONS
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
--addr, -l ADDR
|
||||
The address to listen on. Required.
|
||||
|
||||
*--port, -p PORT*:
|
||||
--port, -p PORT
|
||||
The port to listen on. Required.
|
||||
|
||||
*--file, -f FILE*:
|
||||
--file, -f FILE
|
||||
The file to serve. Must already exist. Required.
|
||||
|
||||
*--sock, -s SOCK*:
|
||||
Path to a control socket to open. You will need this if you want to
|
||||
--sock, -s SOCK
|
||||
Path to a control socket to open. You will need this if you want to
|
||||
migrate, get the current status, or manipulate the access control
|
||||
list.
|
||||
|
||||
*--default-deny, -d*:
|
||||
How to interpret an empty ACL. If --default-deny is given, an
|
||||
empty ACL will let no clients connect. If it is not given, an
|
||||
--default-deny, -d
|
||||
How to interpret an empty ACL. If --default-deny is given, an
|
||||
empty ACL will let no clients connect. If it is not given, an
|
||||
empty ACL will let any client connect.
|
||||
|
||||
listen
|
||||
~~~~~~
|
||||
--killswitch, -k
|
||||
If set, we implement a 2-minute timeout on NBD requests and
|
||||
responses. If a request takes longer than that to complete,
|
||||
the client is disconnected. This is useful to keep broken
|
||||
clients from breaking migrations, among other things.
|
||||
|
||||
$ flexnbd listen --addr <ADDR> --port <PORT> --file <FILE>
|
||||
[--sock <SOCK>] [--default-deny] [global option]* [acl entry]*
|
||||
LISTEN MODE
|
||||
|
||||
Listen for an inbound migration, and quit with a status of 0 on
|
||||
completion.
|
||||
|
||||
$ flexnbd listen --addr ADDR --port PORT --file FILE
|
||||
[--sock SOCK] [--default-deny] [global_option]*
|
||||
[acl_entry]*
|
||||
|
||||
flexnbd will wait for a successful migration, and then quit. The file
|
||||
to write the inbound migration data to must already exist before you
|
||||
run 'flexnbd listen'.
|
||||
|
||||
Only one sender may connect to send data, and if the sender
|
||||
disconnects part-way through the migration, the destination will
|
||||
expect it to reconnect and retry the whole migration. It isn't safe
|
||||
expect it to reconnect and retry the whole migration. It isn't safe
|
||||
to assume that a partial migration can be resumed because the
|
||||
destination has no knowledge of whether a client has made a write to
|
||||
the source in the interim.
|
||||
|
||||
If the migration fails for a reason which the `flexnbd listen` process
|
||||
If the migration fails for a reason which the 'flexnbd listen' process
|
||||
can't fix (say, a failed local write), it will exit with an error
|
||||
status. In this case, the sender will continually retry the migration
|
||||
until it succeeds, and you will need to restart the `flexnbd listen`
|
||||
status. In this case, the sender will continually retry the migration
|
||||
until it succeeds, and you will need to restart the 'flexnbd listen'
|
||||
process to allow that to happen.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
As for 'serve'.
|
||||
OPTIONS
|
||||
|
||||
mirror
|
||||
~~~~~~
|
||||
As for serve.
|
||||
|
||||
$ flexnbd mirror --addr <ADDR> --port <PORT> --sock SOCK
|
||||
[--unlink] [--bind <BIND-ADDR>] [global option]*
|
||||
MIRROR MODE
|
||||
|
||||
Start a migration from the server with control socket SOCK to the server
|
||||
listening at ADDR:PORT.
|
||||
|
||||
$ flexnbd mirror --addr ADDR --port PORT --sock SOCK [--unlink]
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
Migration can be a slow process. Rather than block the 'flexnbd mirror'
|
||||
process until it completes, it will exit with a message of "Migration
|
||||
started" once it has confirmation that the local server was able to
|
||||
connect to ADDR:PORT and got an NBD header back. To check on the
|
||||
connect to ADDR:PORT and got an NBD header back. To check on the
|
||||
progress of a running migration, use 'flexnbd status'.
|
||||
|
||||
If the destination unexpectedly disconnects part-way through the
|
||||
migration, the source will attempt to reconnect and start the migration
|
||||
again. It is not safe to resume the migration from where it left off
|
||||
again. It is not safe to resume the migration from where it left off
|
||||
because the source can't see that the backing store behind the
|
||||
destination is intact, or even on the same machine.
|
||||
|
||||
If the `--unlink` option is given, the local file will be deleted
|
||||
immediately before the mirror connection is terminated. This allows
|
||||
If the --unlink option is given, the local file will be deleted
|
||||
immediately before the mirror connection is terminated. This allows
|
||||
an otherwise-ambiguous situation to be resolved: if you don't unlink
|
||||
the file and the flexnbd process at either end is terminated, it's not
|
||||
possible to tell which copy of the data is canonical. Since the
|
||||
possible to tell which copy of the data is canonical. Since the
|
||||
unlink happens as soon as the sender knows that it has transmitted all
|
||||
the data, there can be no ambiguity.
|
||||
|
||||
Note: files smaller than 4096 bytes cannot be mirrored.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
OPTIONS
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
The address of the remote server to migrate to. Required.
|
||||
--addr, -l ADDR
|
||||
The address of the remote server to migrate to. Required.
|
||||
|
||||
*--port, -p PORT*:
|
||||
The port of the remote server to migrate to. Required.
|
||||
--port, -p PORT
|
||||
The port of the remote server to migrate to. Required.
|
||||
|
||||
*--sock, -s SOCK*:
|
||||
The control socket of the local server to migrate from. Required.
|
||||
--sock, -s SOCK
|
||||
The control socket of the local server to migrate from. Required.
|
||||
|
||||
*--unlink, -u*:
|
||||
Unlink the served file from the local filesystem after successfully
|
||||
mirroring.
|
||||
--unlink, -u
|
||||
Unlink the served file from the local filesystem after
|
||||
successfully mirroring.
|
||||
|
||||
*--bind, -b BIND-ADDR*:
|
||||
The local address to bind to. You may need this if the remote server
|
||||
is using an access control list.
|
||||
--bind, -b BIND_ADDR
|
||||
The local address to bind to. You may need this if the remote
|
||||
server is using an access control list.
|
||||
|
||||
break
|
||||
~~~~~
|
||||
|
||||
$ flexnbd mirror --sock SOCK [global option]*
|
||||
BREAK MODE
|
||||
|
||||
Stop a running migration.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
$ flexnbd break --sock SOCK [global_option]*
|
||||
|
||||
*--sock, -s SOCK*:
|
||||
The control socket of the local server whose emigration to stop.
|
||||
Required.
|
||||
OPTIONS
|
||||
|
||||
--sock, -s SOCK
|
||||
The control socket of the local server whose migration to stop.
|
||||
Required.
|
||||
|
||||
acl
|
||||
~~~
|
||||
|
||||
$ flexnbd acl --sock <SOCK> [acl entry]+ [global option]*
|
||||
ACL MODE
|
||||
|
||||
Set the access control list of the server with the control socket SOCK
|
||||
to the given access control list entries.
|
||||
|
||||
$ flexnbd acl --sock SOCK [acl_entry]+ [global_option]*
|
||||
|
||||
ACL entries are given as IP addresses.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
OPTIONS
|
||||
|
||||
*--sock, -s SOCK*:
|
||||
The control socket of the server whose ACL to replace.
|
||||
--sock, -s SOCK
|
||||
The control socket of the server whose ACL to replace. Required
|
||||
|
||||
status
|
||||
~~~~~~
|
||||
|
||||
$ flexnbd status --sock <SOCK> [global option]*
|
||||
STATUS MODE
|
||||
|
||||
Get the current status of the server with control socket SOCK.
|
||||
|
||||
The status will be printed to STDOUT. It is a space-separated list of
|
||||
key=value pairs. The space character will never appear in a key or
|
||||
value. Currently reported values are:
|
||||
$ flexnbd status --sock SOCK [global_option]*
|
||||
|
||||
*pid*:
|
||||
The status will be printed to STDOUT. It is a space-separated list of
|
||||
key=value pairs. The space character will never appear in a key or
|
||||
value. Currently reported values are:
|
||||
|
||||
pid
|
||||
The process id of the server listening on SOCK.
|
||||
|
||||
*is_mirroring*:
|
||||
is_mirroring
|
||||
'true' if this server is sending migration data, 'false' otherwise.
|
||||
|
||||
*has_control*:
|
||||
has_control
|
||||
'false' if this server was started in 'listen' mode. 'true' otherwise.
|
||||
|
||||
read
|
||||
~~~~
|
||||
OPTIONS
|
||||
|
||||
$ flexnbd read --addr <ADDR> --port <PORT> --from <OFFSET>
|
||||
--size <SIZE> [--bind BIND-ADDR] [global option]*
|
||||
--sock, -s SOCK
|
||||
The control socket of the server of interest. Required.
|
||||
|
||||
READ MODE
|
||||
|
||||
Connect to the server at ADDR:PORT, and read SIZE bytes starting at
|
||||
OFFSET in a single NBD query. The returned data will be echoed to
|
||||
STDOUT. In case of a remote ACL, set the local source address to
|
||||
BIND-ADDR.
|
||||
OFFSET in a single NBD query.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
$ flexnbd read --addr ADDR --port PORT --from OFFSET --size SIZE
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
The address of the remote server. Required.
|
||||
The returned data will be echoed to STDOUT. In case of a remote ACL,
|
||||
set the local source address to BIND_ADDR.
|
||||
|
||||
*--port, -p PORT*:
|
||||
The port of the remote server. Required.
|
||||
OPTIONS
|
||||
|
||||
*--from, -F OFFSET*:
|
||||
The byte offset to start reading from. Required. Maximum 2^62.
|
||||
--addr, -l ADDR
|
||||
The address of the remote server. Required.
|
||||
|
||||
*--size, -S SIZE*:
|
||||
The number of bytes to read. Required. Maximum 2^30.
|
||||
--port, -p PORT
|
||||
The port of the remote server. Required.
|
||||
|
||||
*--bind, -b BIND-ADDR*:
|
||||
The local address to bind to. You may need this if the remote server
|
||||
is using an access control list.
|
||||
--from, -F OFFSET
|
||||
The byte offset to start reading from. Required. Maximum 2^62.
|
||||
|
||||
write
|
||||
~~~~~
|
||||
--size, -S SIZE
|
||||
The number of bytes to read. Required. Maximum 2^30.
|
||||
|
||||
$ cat ... | flexnbd write --addr <ADDR> --port <PORT> --from <OFFSET>
|
||||
--size <SIZE> [--bind BIND-ADDR] [global option]*
|
||||
--bind, -b BIND_ADDR
|
||||
The local address to bind to. You may need this if the remote
|
||||
server is using an access control list.
|
||||
|
||||
WRITE MODE
|
||||
|
||||
Connect to the server at ADDR:PORT, and write SIZE bytes from STDIN
|
||||
starting at OFFSET in a single NBD query. In case of a remote ACL, set
|
||||
the local source address to BIND-ADDR.
|
||||
starting at OFFSET in a single NBD query.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
$ cat ... | flexnbd write --addr ADDR --port PORT --from OFFSET
|
||||
--size SIZE [--bind BIND_ADDR] [global_option]*
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
The address of the remote server. Required.
|
||||
In case of a remote ACL, set the local source address to BIND_ADDR.
|
||||
|
||||
*--port, -p PORT*:
|
||||
The port of the remote server. Required.
|
||||
OPTIONS
|
||||
|
||||
*--from, -F OFFSET*:
|
||||
The byte offset to start writing from. Required. Maximum 2^62.
|
||||
--addr, -l ADDR
|
||||
The address of the remote server. Required.
|
||||
|
||||
*--size, -S SIZE*:
|
||||
The number of bytes to write. Required. Maximum 2^30.
|
||||
--port, -p PORT
|
||||
The port of the remote server. Required.
|
||||
|
||||
*--bind, -b BIND-ADDR*:
|
||||
The local address to bind to. You may need this if the remote server
|
||||
is using an access control list.
|
||||
--from, -F OFFSET
|
||||
The byte offset to start writing from. Required. Maximum 2^62.
|
||||
|
||||
help
|
||||
~~~~
|
||||
--size, -S SIZE
|
||||
The number of bytes to write. Required. Maximum 2^30.
|
||||
|
||||
$ flexnbd help [command] [global option]*
|
||||
--bind, -b BIND_ADDR
|
||||
The local address to bind to. You may need this if the remote
|
||||
server is using an access control list.
|
||||
|
||||
Without 'command', show the list of available commands. With 'command',
|
||||
show help for that command.
|
||||
HELP MODE
|
||||
|
||||
$ flexnbd help [mode] [global_option]*
|
||||
|
||||
Without mode, show the list of available modes. With mode, show help for that mode.
|
||||
|
||||
GLOBAL OPTIONS
|
||||
--------------
|
||||
|
||||
*--help, -h* :
|
||||
Show command or global help.
|
||||
--help, -h Show mode or global help.
|
||||
|
||||
*--verbose, -v* :
|
||||
Output all available log information to STDERR.
|
||||
|
||||
*--quiet, -q* :
|
||||
Output as little log information as possible to STDERR.
|
||||
--verbose, -v Output all available log information to STDERR.
|
||||
|
||||
--quiet, -q Output as little log information as possible to STDERR.
|
||||
|
||||
LOGGING
|
||||
-------
|
||||
Log output is sent to STDERR. If --quiet is set, no output will be seen
|
||||
unless the program termintes abnormally. If neither --quiet nor
|
||||
|
||||
Log output is sent to STDERR. If --quiet is set, no output will be
|
||||
seen unless the program termintes abnormally. If neither --quiet nor
|
||||
--verbose are set, no output will be seen unless something goes wrong
|
||||
with a specific request. If --verbose is given, every available log
|
||||
message will be seen (which, for a debug build, is many). It is not an
|
||||
error to set both --verbose and --quiet. The last one wins.
|
||||
with a specific request. If --verbose is given, every available log
|
||||
message will be seen (which, for a debug build, is many). It is not an
|
||||
error to set both --verbose and --quiet. The last one wins.
|
||||
|
||||
The log line format is:
|
||||
|
||||
<TIMESTAMP>:<LEVEL>:<PID> <THREAD> <SOURCEFILE>:<SOURCELINE>: <MSG>
|
||||
<TIMESTAMP>:<LEVEL>:<PID> <THREAD> <SOURCEFILE:SOURCELINE>: <MSG>
|
||||
|
||||
*TIMESTAMP*:
|
||||
Time the log entry was made. This is expressed in terms of monotonic ms.
|
||||
<TIMESTAMP>
|
||||
Time the log entry was made. This is expressed in terms of monotonic
|
||||
ms.
|
||||
|
||||
*LEVEL*:
|
||||
<LEVEL>
|
||||
This will be one of 'D', 'I', 'W', 'E', 'F' in increasing order of
|
||||
severity. If flexnbd is started with the --quiet flag, only 'F' will be
|
||||
seen. If it is started with the --verbose flag, any from 'I' upwards
|
||||
will be seen. Only if you have a debug build and start it with
|
||||
--verbose will you see 'D' entries.
|
||||
severity. If flexnbd is started with the --quiet flag, only 'F'
|
||||
will be seen. If it is started with the --verbose flag, any from 'I'
|
||||
upwards will be seen. Only if you have a debug build and start it
|
||||
with --verbose will you see 'D' entries.
|
||||
|
||||
*PID*:
|
||||
<PID>
|
||||
This is the process ID.
|
||||
|
||||
*THREAD*:
|
||||
There are several pthreads per flexnbd process: a main thread, a serve
|
||||
thread, a thread per client, and possibly a pair of mirror threads and a
|
||||
control thread. This field identifies which thread was responsible for
|
||||
the log line.
|
||||
<THREAD>
|
||||
There are several pthreads per flexnbd process: a main thread, a
|
||||
serve thread, a thread per client, and possibly a pair of mirror
|
||||
threads and a control thread. This field identifies which thread was
|
||||
responsible for the log line.
|
||||
|
||||
*SOURCEFILE:SOURCELINE*:
|
||||
<SOURCEFILE:SOURCELINE>
|
||||
Identifies where in the source code this log line can be found.
|
||||
|
||||
*MSG*:
|
||||
<MSG>
|
||||
A short message describing what's happening, how it's being done, or
|
||||
if you're very lucky *why* it's going on.
|
||||
if you're very lucky why it's going on.
|
||||
|
||||
EXAMPLES
|
||||
--------
|
||||
|
||||
Serving a file
|
||||
~~~~~~~~~~~~~~
|
||||
SERVING A FILE
|
||||
|
||||
The simplest case is serving a file on the default nbd port:
|
||||
|
||||
@@ -320,8 +331,7 @@ The simplest case is serving a file on the default nbd port:
|
||||
root:x:
|
||||
$
|
||||
|
||||
Reading server status
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
READING SERVER STATUS
|
||||
|
||||
In order to read a server's status, we need it to open a control socket.
|
||||
|
||||
@@ -329,13 +339,12 @@ In order to read a server's status, we need it to open a control socket.
|
||||
--sock /tmp/flexnbd.sock
|
||||
$ flexnbd status --sock /tmp/flexnbd.sock
|
||||
pid=9635 is_mirroring=false has_control=true
|
||||
|
||||
|
||||
$
|
||||
|
||||
Note that the status output is newline-terminated.
|
||||
|
||||
Migrating
|
||||
~~~~~~~~~
|
||||
MIGRATING
|
||||
|
||||
To migrate, we need to provide a destination file of the right size.
|
||||
|
||||
@@ -361,8 +370,8 @@ With this knowledge in hand, we can start the migration:
|
||||
$ flexnbd mirror --addr 127.0.0.1 --port 4779 \
|
||||
--sock /tmp/flex-source.sock
|
||||
Migration started
|
||||
[1] + 9648 done build/flexnbd serve --addr 0.0.0.0 --port 4778
|
||||
[2] + 9651 done build/flexnbd listen --addr 0.0.0.0 --port 4779
|
||||
[1] + 9648 done flexnbd serve --addr 0.0.0.0 --port 4778
|
||||
[2] + 9651 done flexnbd listen --addr 0.0.0.0 --port 4779
|
||||
$
|
||||
|
||||
Note that because the file is so small in this case, we see the source
|
||||
@@ -370,21 +379,25 @@ server quit soon after we start the migration, and the destination
|
||||
exited at roughly the same time.
|
||||
|
||||
BUGS
|
||||
----
|
||||
|
||||
Should be reported to alex@bytemark.co.uk.
|
||||
Should be reported on GitHub at
|
||||
|
||||
* https://github.com/BytemarkHosting/flexnbd-c/issues
|
||||
|
||||
AUTHOR
|
||||
------
|
||||
|
||||
Written by Alex Young <alex@bytemark.co.uk>.
|
||||
Originally written by Alex Young <alex@blackkettle.org>.
|
||||
Original concept and core code by Matthew Bloch <matthew@bytemark.co.uk>.
|
||||
Some additions by Nick Thomas <nick@bytemark.co.uk>
|
||||
Proxy mode written by Nick Thomas <me@ur.gs>.
|
||||
|
||||
COPYING
|
||||
-------
|
||||
The full commit history is available on GitHub.
|
||||
|
||||
Copyright (c) 2012 Bytemark Hosting Ltd. Free use of this software is
|
||||
granted under the terms of the GNU General Public License version 3 or
|
||||
later.
|
||||
SEE ALSO
|
||||
|
||||
flexnbd-proxy(1), nbd-client(8), xnbd-server(8), xnbd-client(8)
|
||||
|
||||
COPYRIGHT
|
||||
|
||||
Copyright (c) 2012-2016 Bytemark Hosting Ltd. Free use of this
|
||||
software is granted under the terms of the GNU General Public License
|
||||
version 3 or later.
|
||||
|
312
Rakefile
312
Rakefile
@@ -1,312 +0,0 @@
|
||||
$: << '../rake_utils/lib'
|
||||
require 'rake_utils/debian'
|
||||
include RakeUtils::DSL
|
||||
|
||||
CC=ENV['CC'] || "gcc"
|
||||
|
||||
DEBUG = ENV.has_key?('DEBUG') &&
|
||||
%w|yes y ok 1 true t|.include?(ENV['DEBUG'])
|
||||
|
||||
ALL_SOURCES = FileList['src/*']
|
||||
|
||||
PROXY_ONLY_SOURCES = FileList['src/{proxy-main,proxy}.c']
|
||||
PROXY_ONLY_OBJECTS = PROXY_ONLY_SOURCES.pathmap( "%{^src,build}X.o" )
|
||||
|
||||
SOURCES = ALL_SOURCES.select { |c| c =~ /\.c$/ } - PROXY_ONLY_SOURCES
|
||||
OBJECTS = SOURCES.pathmap( "%{^src,build}X.o" ) - PROXY_ONLY_OBJECTS
|
||||
|
||||
PROXY_SOURCES = FileList['src/{ioutil,nbdtypes,readwrite,sockutil,util,parse}.c'] + PROXY_ONLY_SOURCES
|
||||
PROXY_OBJECTS = PROXY_SOURCES.pathmap( "%{^src,build}X.o" )
|
||||
|
||||
TEST_SOURCES = FileList['tests/unit/*.c']
|
||||
TEST_OBJECTS = TEST_SOURCES.pathmap( "%{^tests/unit,build/tests}X.o" )
|
||||
|
||||
LIBS = %w( pthread )
|
||||
LDFLAGS = ["-lrt -lev"]
|
||||
CCFLAGS = %w(
|
||||
-D_GNU_SOURCE=1
|
||||
-Wall
|
||||
-Wextra
|
||||
-Werror-implicit-function-declaration
|
||||
-Wstrict-prototypes
|
||||
-Wno-missing-field-initializers
|
||||
) + # Added -Wno-missing-field-initializers to shut GCC up over {0} struct initialisers
|
||||
[ENV['CFLAGS']]
|
||||
|
||||
LIBCHECK = File.exists?("/usr/lib/libcheck.a") ?
|
||||
"/usr/lib/libcheck.a" :
|
||||
"/usr/local/lib/libcheck.a"
|
||||
|
||||
TEST_MODULES = Dir["tests/unit/check_*.c"].map { |n|
|
||||
File.basename( n )[%r{check_(.+)\.c},1] }
|
||||
|
||||
if DEBUG
|
||||
LDFLAGS << ["-g"]
|
||||
CCFLAGS << ["-g -DDEBUG"]
|
||||
else
|
||||
CCFLAGS << "-O2"
|
||||
end
|
||||
|
||||
desc "Build the binary and man page"
|
||||
task :build => [:flexnbd, :flexnbd_proxy, :man]
|
||||
task :default => :build
|
||||
|
||||
desc "Build just the flexnbd binary"
|
||||
task :flexnbd => "build/flexnbd"
|
||||
|
||||
desc "Build just the flexnbd-proxy binary"
|
||||
task :flexnbd_proxy => "build/flexnbd-proxy"
|
||||
|
||||
def check(m)
|
||||
"build/tests/check_#{m}"
|
||||
end
|
||||
|
||||
file "README.txt"
|
||||
file "README.proxy.txt"
|
||||
|
||||
def manpage(name, src)
|
||||
FileUtils.mkdir_p( "build" )
|
||||
sh "a2x --destination-dir build --format manpage #{src}"
|
||||
sh "gzip -f build/#{name}"
|
||||
end
|
||||
|
||||
file "build/flexnbd.1.gz" => "README.txt" do
|
||||
manpage("flexnbd.1", "README.txt")
|
||||
end
|
||||
|
||||
file "build/flexnbd-proxy.1.gz" => "README.proxy.txt" do
|
||||
manpage("flexnbd-proxy.1", "README.proxy.txt")
|
||||
end
|
||||
|
||||
desc "Build just the man page"
|
||||
task :man => ["build/flexnbd.1.gz", "build/flexnbd-proxy.1.gz"]
|
||||
|
||||
|
||||
namespace "test" do
|
||||
desc "Run all tests"
|
||||
task 'run' => ["unit", "scenarios"]
|
||||
|
||||
desc "Build C tests"
|
||||
task 'build' => TEST_MODULES.map { |n| check n}
|
||||
|
||||
TEST_MODULES.each do |m|
|
||||
desc "Run tests for #{m}"
|
||||
task "check_#{m}" => check(m) do
|
||||
sh check m
|
||||
end
|
||||
end
|
||||
|
||||
desc "Run C tests"
|
||||
task 'unit' => 'build' do
|
||||
TEST_MODULES.each do |n|
|
||||
ENV['EF_DISABLE_BANNER'] = '1'
|
||||
sh check n
|
||||
end
|
||||
end
|
||||
|
||||
desc "Run NBD test scenarios"
|
||||
task 'scenarios' => ['build/flexnbd', 'build/flexnbd-proxy'] do
|
||||
sh "cd tests/acceptance; ruby nbd_scenarios -v"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
def gcc_compile( target, source )
|
||||
FileUtils.mkdir_p File.dirname( target )
|
||||
sh "#{CC} -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
|
||||
end
|
||||
|
||||
def gcc_link(target, objects)
|
||||
FileUtils.mkdir_p File.dirname( target )
|
||||
|
||||
sh "#{CC} #{LDFLAGS.join(' ')} "+
|
||||
" -Isrc " +
|
||||
" -o #{target} "+
|
||||
objects.join(" ") +
|
||||
" "+LIBS.map { |l| "-l#{l}" }.join(" ")
|
||||
end
|
||||
|
||||
def headers(c)
|
||||
`#{CC} -Isrc -MM #{c}`.gsub("\\\n", " ").split(" ")[2..-1]
|
||||
end
|
||||
|
||||
rule 'build/flexnbd-proxy' => PROXY_OBJECTS do |t|
|
||||
gcc_link(t.name, t.sources)
|
||||
end
|
||||
|
||||
rule 'build/flexnbd' => OBJECTS do |t|
|
||||
gcc_link(t.name, t.sources)
|
||||
end
|
||||
|
||||
|
||||
file check("client") =>
|
||||
%w{build/tests/check_client.o
|
||||
build/self_pipe.o
|
||||
build/nbdtypes.o
|
||||
build/flexnbd.o
|
||||
build/flexthread.o
|
||||
build/control.o
|
||||
build/readwrite.o
|
||||
build/parse.o
|
||||
build/client.o
|
||||
build/serve.o
|
||||
build/acl.o
|
||||
build/ioutil.o
|
||||
build/mbox.o
|
||||
build/mirror.o
|
||||
build/status.o
|
||||
build/sockutil.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("acl") =>
|
||||
%w{build/tests/check_acl.o
|
||||
build/parse.o
|
||||
build/acl.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check( "util" ) =>
|
||||
%w{build/tests/check_util.o
|
||||
build/util.o
|
||||
build/self_pipe.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("serve") =>
|
||||
%w{build/tests/check_serve.o
|
||||
build/self_pipe.o
|
||||
build/nbdtypes.o
|
||||
build/control.o
|
||||
build/readwrite.o
|
||||
build/parse.o
|
||||
build/client.o
|
||||
build/flexthread.o
|
||||
build/serve.o
|
||||
build/flexnbd.o
|
||||
build/mirror.o
|
||||
build/status.o
|
||||
build/acl.o
|
||||
build/mbox.o
|
||||
build/ioutil.o
|
||||
build/sockutil.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("status") =>
|
||||
%w{
|
||||
build/tests/check_status.o
|
||||
build/self_pipe.o
|
||||
build/nbdtypes.o
|
||||
build/control.o
|
||||
build/readwrite.o
|
||||
build/parse.o
|
||||
build/client.o
|
||||
build/flexthread.o
|
||||
build/serve.o
|
||||
build/flexnbd.o
|
||||
build/mirror.o
|
||||
build/status.o
|
||||
build/acl.o
|
||||
build/mbox.o
|
||||
build/ioutil.o
|
||||
build/sockutil.o
|
||||
build/util.o
|
||||
} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
|
||||
file check("readwrite") =>
|
||||
%w{build/tests/check_readwrite.o
|
||||
build/readwrite.o
|
||||
build/client.o
|
||||
build/self_pipe.o
|
||||
build/serve.o
|
||||
build/parse.o
|
||||
build/acl.o
|
||||
build/flexthread.o
|
||||
build/control.o
|
||||
build/flexnbd.o
|
||||
build/mirror.o
|
||||
build/status.o
|
||||
build/nbdtypes.o
|
||||
build/mbox.o
|
||||
build/ioutil.o
|
||||
build/sockutil.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
|
||||
file check("flexnbd") =>
|
||||
%w{build/tests/check_flexnbd.o
|
||||
build/flexnbd.o
|
||||
build/ioutil.o
|
||||
build/sockutil.o
|
||||
build/util.o
|
||||
build/control.o
|
||||
build/mbox.o
|
||||
build/flexthread.o
|
||||
build/status.o
|
||||
build/self_pipe.o
|
||||
build/client.o
|
||||
build/acl.o
|
||||
build/parse.o
|
||||
build/nbdtypes.o
|
||||
build/readwrite.o
|
||||
build/mirror.o
|
||||
build/serve.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
|
||||
file check("control") =>
|
||||
%w{build/tests/check_control.o} + OBJECTS - ["build/main.o", 'build/proxy-main.o', 'build/proxy.o'] do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
(TEST_MODULES- %w{status control flexnbd acl client serve readwrite util}).each do |m|
|
||||
tgt = "build/tests/check_#{m}.o"
|
||||
maybe_obj_name = "build/#{m}.o"
|
||||
# Take it out in case we're testing one of the utils
|
||||
deps = ["build/ioutil.o", "build/util.o", "build/sockutil.o"] - [maybe_obj_name]
|
||||
|
||||
# Add it back in if it's something we need to compile
|
||||
deps << maybe_obj_name if OBJECTS.include?( maybe_obj_name )
|
||||
|
||||
file check( m ) => deps + [tgt] do |t|
|
||||
gcc_link(t.name, deps + [tgt, LIBCHECK])
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
OBJECTS.zip( SOURCES ).each do |o,c|
|
||||
file o => [c]+headers(c) do |t| gcc_compile( o, c ) end
|
||||
end
|
||||
|
||||
PROXY_ONLY_OBJECTS.zip( PROXY_ONLY_SOURCES).each do |o, c|
|
||||
file o => [c]+headers(c) do |t| gcc_compile( o, c ) end
|
||||
end
|
||||
|
||||
TEST_OBJECTS.zip( TEST_SOURCES ).each do |o,c|
|
||||
file o => [c] + headers(c) do |t| gcc_compile( o, c ) end
|
||||
end
|
||||
|
||||
desc "Remove all build targets, binaries and temporary files"
|
||||
task :clean do
|
||||
sh "rm -rf *~ build"
|
||||
end
|
||||
|
||||
namespace :pkg do
|
||||
deb do |t|
|
||||
t.code_files = ALL_SOURCES + ["Rakefile", "README.txt", "README.proxy.txt"]
|
||||
t.pkg_name = "flexnbd"
|
||||
t.generate_changelog!
|
||||
end
|
||||
end
|
||||
|
2715
debian/changelog
vendored
2715
debian/changelog
vendored
File diff suppressed because it is too large
Load Diff
10
debian/control
vendored
10
debian/control
vendored
@@ -1,14 +1,14 @@
|
||||
Source: flexnbd
|
||||
Section: unknown
|
||||
Section: web
|
||||
Priority: extra
|
||||
Maintainer: Alex Young <alex@bytemark.co.uk>
|
||||
Build-Depends: cdbs, debhelper (>= 7.0.50), ruby, rake, gcc, libev-dev
|
||||
Maintainer: Patrick J Cherry <patrick@bytemark.co.uk>
|
||||
Build-Depends: debhelper (>= 7.0.50), ruby, gcc, libev-dev, txt2man, check, net-tools, libsubunit-dev, ruby-test-unit
|
||||
Standards-Version: 3.8.1
|
||||
Homepage: http://bigv.io/
|
||||
Homepage: https://github.com/BytemarkHosting/flexnbd-c
|
||||
|
||||
Package: flexnbd
|
||||
Architecture: any
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, libev3
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, libev4 | libev3
|
||||
Description: FlexNBD server
|
||||
An NBD server offering push-mirroring and intelligent sparse file handling
|
||||
|
||||
|
2
debian/flexnbd.install
vendored
2
debian/flexnbd.install
vendored
@@ -1,5 +1,3 @@
|
||||
build/flexnbd usr/bin
|
||||
build/flexnbd-proxy usr/bin
|
||||
build/flexnbd.1.gz usr/share/man/man1
|
||||
build/flexnbd-proxy.1.gz usr/share/man/man1
|
||||
|
||||
|
2
debian/flexnbd.manpages
vendored
Normal file
2
debian/flexnbd.manpages
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
build/flexnbd.1.gz
|
||||
build/flexnbd-proxy.1.gz
|
15
debian/rules
vendored
15
debian/rules
vendored
@@ -7,12 +7,13 @@
|
||||
%:
|
||||
dh $@
|
||||
|
||||
override_dh_auto_build:
|
||||
rake build
|
||||
|
||||
override_dh_auto_clean:
|
||||
rake clean
|
||||
|
||||
.PHONY: override_dh_strip
|
||||
override_dh_strip:
|
||||
dh_strip --dbg-package=flexnbd-dbg
|
||||
|
||||
#
|
||||
# TODO: The ruby test suites don't work during buiding in a chroot, so leave
|
||||
# them out for now.
|
||||
#
|
||||
#override_dh_auto_test:
|
||||
# rake test:run
|
||||
|
||||
|
@@ -31,8 +31,6 @@ int build_allocation_map(struct bitset * allocation_map, int fd)
|
||||
|
||||
for (offset = 0; offset < allocation_map->size; ) {
|
||||
|
||||
unsigned int i;
|
||||
|
||||
fiemap->fm_start = offset;
|
||||
|
||||
fiemap->fm_length = max_length;
|
||||
@@ -49,7 +47,7 @@ int build_allocation_map(struct bitset * allocation_map, int fd)
|
||||
return 0; /* it's up to the caller to free the map */
|
||||
}
|
||||
else {
|
||||
for ( i = 0; i < fiemap->fm_mapped_extents; i++ ) {
|
||||
for ( unsigned int i = 0; i < fiemap->fm_mapped_extents; i++ ) {
|
||||
bitset_set_range( allocation_map,
|
||||
fiemap->fm_extents[i].fe_logical,
|
||||
fiemap->fm_extents[i].fe_length );
|
||||
@@ -71,22 +69,23 @@ int build_allocation_map(struct bitset * allocation_map, int fd)
|
||||
}
|
||||
}
|
||||
|
||||
debug("Successfully built allocation map");
|
||||
info("Successfully built allocation map");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int open_and_mmap(const char* filename, int* out_fd, off64_t *out_size, void **out_map)
|
||||
int open_and_mmap(const char* filename, int* out_fd, uint64_t *out_size, void **out_map)
|
||||
{
|
||||
/*
|
||||
* size and out_size are intentionally of different types.
|
||||
* lseek64() uses off64_t to signal errors in the sign bit.
|
||||
* Since we check for these errors before trying to assign to
|
||||
* *out_size, we know *out_size can never go negative.
|
||||
*/
|
||||
off64_t size;
|
||||
|
||||
/* O_DIRECT seems to be intermittently supported. Leaving it as
|
||||
* a compile-time option for now. */
|
||||
#ifdef DIRECT_IO
|
||||
*out_fd = open(filename, O_RDWR | O_DIRECT | O_SYNC );
|
||||
#else
|
||||
/* O_DIRECT should not be used with mmap() */
|
||||
*out_fd = open(filename, O_RDWR | O_SYNC );
|
||||
#endif
|
||||
|
||||
if (*out_fd < 1) {
|
||||
warn("open(%s) failed: does it exist?", filename);
|
||||
@@ -109,8 +108,11 @@ int open_and_mmap(const char* filename, int* out_fd, off64_t *out_size, void **o
|
||||
warn("mmap64() failed");
|
||||
return -1;
|
||||
}
|
||||
debug("opened %s size %ld on fd %d @ %p", filename, size, *out_fd, *out_map);
|
||||
}
|
||||
else {
|
||||
debug("opened %s size %ld on fd %d", filename, size, *out_fd);
|
||||
}
|
||||
debug("opened %s size %ld on fd %d @ %p", filename, size, *out_fd, *out_map);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -139,7 +141,7 @@ int readloop(int filedes, void *buffer, size_t size)
|
||||
ssize_t result = read(filedes, buffer+readden, size-readden);
|
||||
|
||||
if ( result == 0 /* EOF */ ) {
|
||||
warn( "end-of-file detected while reading" );
|
||||
warn( "end-of-file detected while reading after %i bytes", readden );
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -347,4 +349,3 @@ ssize_t iobuf_write( int fd, struct iobuf *iobuf )
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@@ -65,7 +65,7 @@ int read_lines_until_blankline(int fd, int max_line_length, char ***lines);
|
||||
* ''out_size'' and the address of the mmap in ''out_map''. If anything goes
|
||||
* wrong, returns -1 setting errno, otherwise 0.
|
||||
*/
|
||||
int open_and_mmap( const char* filename, int* out_fd, off64_t *out_size, void **out_map);
|
||||
int open_and_mmap( const char* filename, int* out_fd, uint64_t* out_size, void **out_map);
|
||||
|
||||
|
||||
/** Check to see whether the given file descriptor is closed.
|
@@ -7,8 +7,9 @@ void mode(char* mode, int argc, char **argv);
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
#define GETOPT_ARG(x,s) {(x), 1, 0, (s)}
|
||||
#define GETOPT_FLAG(x,v) {(x), 0, 0, (v)}
|
||||
#define GETOPT_ARG(x,s) {(x), required_argument, 0, (s)}
|
||||
#define GETOPT_FLAG(x,v) {(x), no_argument, 0, (v)}
|
||||
#define GETOPT_OPTARG(x,s) {(x), optional_argument, 0, (s)}
|
||||
|
||||
#define OPT_HELP "help"
|
||||
#define OPT_ADDR "addr"
|
||||
@@ -19,6 +20,7 @@ void mode(char* mode, int argc, char **argv);
|
||||
#define OPT_FROM "from"
|
||||
#define OPT_SIZE "size"
|
||||
#define OPT_DENY "default-deny"
|
||||
#define OPT_CACHE "cache"
|
||||
#define OPT_UNLINK "unlink"
|
||||
#define OPT_CONNECT_ADDR "conn-addr"
|
||||
#define OPT_CONNECT_PORT "conn-port"
|
||||
@@ -52,6 +54,7 @@ void mode(char* mode, int argc, char **argv);
|
||||
#define GETOPT_FROM GETOPT_ARG( OPT_FROM, 'F' )
|
||||
#define GETOPT_SIZE GETOPT_ARG( OPT_SIZE, 'S' )
|
||||
#define GETOPT_BIND GETOPT_ARG( OPT_BIND, 'b' )
|
||||
#define GETOPT_CACHE GETOPT_OPTARG( OPT_CACHE, 'c' )
|
||||
#define GETOPT_UNLINK GETOPT_ARG( OPT_UNLINK, 'u' )
|
||||
#define GETOPT_CONNECT_ADDR GETOPT_ARG( OPT_CONNECT_ADDR, 'C' )
|
||||
#define GETOPT_CONNECT_PORT GETOPT_ARG( OPT_CONNECT_PORT, 'P' )
|
@@ -27,7 +27,7 @@ void nbd_r2h_request( struct nbd_request_raw *from, struct nbd_request * to )
|
||||
{
|
||||
to->magic = htobe32( from->magic );
|
||||
to->type = htobe32( from->type );
|
||||
memcpy( to->handle, from->handle, 8 );
|
||||
to->handle.w = from->handle.w;
|
||||
to->from = htobe64( from->from );
|
||||
to->len = htobe32( from->len );
|
||||
}
|
||||
@@ -36,7 +36,7 @@ void nbd_h2r_request( struct nbd_request * from, struct nbd_request_raw * to )
|
||||
{
|
||||
to->magic = be32toh( from->magic );
|
||||
to->type = be32toh( from->type );
|
||||
memcpy( to->handle, from->handle, 8 );
|
||||
to->handle.w = from->handle.w;
|
||||
to->from = be64toh( from->from );
|
||||
to->len = be32toh( from->len );
|
||||
}
|
||||
@@ -46,13 +46,13 @@ void nbd_r2h_reply( struct nbd_reply_raw * from, struct nbd_reply * to )
|
||||
{
|
||||
to->magic = htobe32( from->magic );
|
||||
to->error = htobe32( from->error );
|
||||
memcpy( to->handle, from->handle, 8 );
|
||||
to->handle.w = from->handle.w;
|
||||
}
|
||||
|
||||
void nbd_h2r_reply( struct nbd_reply * from, struct nbd_reply_raw * to )
|
||||
{
|
||||
to->magic = be32toh( from->magic );
|
||||
to->error = be32toh( from->error );
|
||||
memcpy( to->handle, from->handle, 8 );
|
||||
to->handle.w = from->handle.w;
|
||||
}
|
||||
|
@@ -16,7 +16,7 @@
|
||||
|
||||
|
||||
/* 1MiB is the de-facto standard for maximum size of header + data */
|
||||
#define NBD_MAX_SIZE ( 1024 * 1024 )
|
||||
#define NBD_MAX_SIZE ( 32 * 1024 * 1024 )
|
||||
|
||||
#define NBD_REQUEST_SIZE ( sizeof( struct nbd_request_raw ) )
|
||||
#define NBD_REPLY_SIZE ( sizeof( struct nbd_reply_raw ) )
|
||||
@@ -24,6 +24,11 @@
|
||||
#include <linux/types.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
typedef union nbd_handle_t {
|
||||
uint8_t b[8];
|
||||
uint64_t w;
|
||||
} nbd_handle_t;
|
||||
|
||||
/* The _raw types are the types as they appear on the wire. Non-_raw
|
||||
* types are in host-format.
|
||||
* Conversion functions are _r2h_ for converting raw to host, and _h2r_
|
||||
@@ -39,7 +44,7 @@ struct nbd_init_raw {
|
||||
struct nbd_request_raw {
|
||||
__be32 magic;
|
||||
__be32 type; /* == READ || == WRITE */
|
||||
char handle[8];
|
||||
nbd_handle_t handle;
|
||||
__be64 from;
|
||||
__be32 len;
|
||||
} __attribute__((packed));
|
||||
@@ -47,7 +52,7 @@ struct nbd_request_raw {
|
||||
struct nbd_reply_raw {
|
||||
__be32 magic;
|
||||
__be32 error; /* 0 = ok, else error */
|
||||
char handle[8]; /* handle you got from request */
|
||||
nbd_handle_t handle; /* handle you got from request */
|
||||
};
|
||||
|
||||
|
||||
@@ -62,7 +67,7 @@ struct nbd_init {
|
||||
struct nbd_request {
|
||||
uint32_t magic;
|
||||
uint32_t type; /* == READ || == WRITE || == DISCONNECT */
|
||||
char handle[8];
|
||||
nbd_handle_t handle;
|
||||
uint64_t from;
|
||||
uint32_t len;
|
||||
} __attribute__((packed));
|
||||
@@ -70,7 +75,7 @@ struct nbd_request {
|
||||
struct nbd_reply {
|
||||
uint32_t magic;
|
||||
uint32_t error; /* 0 = ok, else error */
|
||||
char handle[8]; /* handle you got from request */
|
||||
nbd_handle_t handle; /* handle you got from request */
|
||||
};
|
||||
|
||||
void nbd_r2h_init( struct nbd_init_raw * from, struct nbd_init * to );
|
@@ -41,7 +41,7 @@ int socket_connect(struct sockaddr* to, struct sockaddr* from)
|
||||
return fd;
|
||||
}
|
||||
|
||||
int nbd_check_hello( struct nbd_init_raw* init_raw, off64_t* out_size )
|
||||
int nbd_check_hello( struct nbd_init_raw* init_raw, uint64_t* out_size )
|
||||
{
|
||||
if ( strncmp( init_raw->passwd, INIT_PASSWD, 8 ) != 0 ) {
|
||||
warn( "wrong passwd" );
|
||||
@@ -62,7 +62,7 @@ fail:
|
||||
|
||||
}
|
||||
|
||||
int socket_nbd_read_hello( int fd, off64_t* out_size )
|
||||
int socket_nbd_read_hello( int fd, uint64_t* out_size )
|
||||
{
|
||||
struct nbd_init_raw init_raw;
|
||||
|
||||
@@ -101,12 +101,11 @@ int socket_nbd_write_hello(int fd, off64_t out_size)
|
||||
return 1;
|
||||
}
|
||||
|
||||
void fill_request(struct nbd_request *request, int type, off64_t from, int len)
|
||||
void fill_request(struct nbd_request *request, int type, uint64_t from, uint32_t len)
|
||||
{
|
||||
request->magic = htobe32(REQUEST_MAGIC);
|
||||
request->type = htobe32(type);
|
||||
((int*) request->handle)[0] = rand();
|
||||
((int*) request->handle)[1] = rand();
|
||||
request->handle.w = (((uint64_t)rand()) << 32) | ((uint64_t)rand());
|
||||
request->from = htobe64(from);
|
||||
request->len = htobe32(len);
|
||||
}
|
||||
@@ -126,7 +125,7 @@ void read_reply(int fd, struct nbd_request *request, struct nbd_reply *reply)
|
||||
if (reply->error != 0) {
|
||||
error("Server replied with error %d", reply->error);
|
||||
}
|
||||
if (strncmp(request->handle, reply->handle, 8) != 0) {
|
||||
if (request->handle.w != reply->handle.w) {
|
||||
error("Did not reply with correct handle");
|
||||
}
|
||||
}
|
||||
@@ -149,7 +148,7 @@ void wait_for_data( int fd, int timeout_secs )
|
||||
}
|
||||
|
||||
|
||||
void socket_nbd_read(int fd, off64_t from, int len, int out_fd, void* out_buf, int timeout_secs)
|
||||
void socket_nbd_read(int fd, uint64_t from, uint32_t len, int out_fd, void* out_buf, int timeout_secs)
|
||||
{
|
||||
struct nbd_request request;
|
||||
struct nbd_reply reply;
|
||||
@@ -173,7 +172,7 @@ void socket_nbd_read(int fd, off64_t from, int len, int out_fd, void* out_buf, i
|
||||
}
|
||||
}
|
||||
|
||||
void socket_nbd_write(int fd, off64_t from, int len, int in_fd, void* in_buf, int timeout_secs)
|
||||
void socket_nbd_write(int fd, uint64_t from, uint32_t len, int in_fd, void* in_buf, int timeout_secs)
|
||||
{
|
||||
struct nbd_request request;
|
||||
struct nbd_reply reply;
|
||||
@@ -213,10 +212,12 @@ int socket_nbd_disconnect( int fd )
|
||||
}
|
||||
|
||||
#define CHECK_RANGE(error_type) { \
|
||||
off64_t size;\
|
||||
uint64_t size;\
|
||||
int success = socket_nbd_read_hello(params->client, &size); \
|
||||
if ( success ) {\
|
||||
if (params->from < 0 || (params->from + params->len) > size) {\
|
||||
uint64_t endpoint = params->from + params->len; \
|
||||
if (endpoint > size || \
|
||||
endpoint < params->from ) { /* this happens on overflow */ \
|
||||
fatal(error_type \
|
||||
" request %d+%d is out of range given size %d", \
|
||||
params->from, params->len, size\
|
23
src/common/readwrite.h
Normal file
23
src/common/readwrite.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef READWRITE_H
|
||||
|
||||
#define READWRITE_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include "nbdtypes.h"
|
||||
|
||||
int socket_connect(struct sockaddr* to, struct sockaddr* from);
|
||||
int socket_nbd_read_hello(int fd, uint64_t* size);
|
||||
int socket_nbd_write_hello(int fd, uint64_t size);
|
||||
void socket_nbd_read(int fd, uint64_t from, uint32_t len, int out_fd, void* out_buf, int timeout_secs);
|
||||
void socket_nbd_write(int fd, uint64_t from, uint32_t len, int out_fd, void* out_buf, int timeout_secs);
|
||||
int socket_nbd_disconnect( int fd );
|
||||
|
||||
/* as you can see, we're slowly accumulating code that should really be in an
|
||||
* NBD library */
|
||||
|
||||
void nbd_hello_to_buf( struct nbd_init_raw* buf, uint64_t out_size );
|
||||
int nbd_check_hello( struct nbd_init_raw* init_raw, uint64_t* out_size );
|
||||
|
||||
#endif
|
||||
|
@@ -63,7 +63,5 @@ void do_remote_command(char* command, char* socket_name, int argc, char** argv)
|
||||
print_response( response );
|
||||
|
||||
exit(atoi(response));
|
||||
|
||||
close(remote);
|
||||
}
|
||||
|
@@ -51,7 +51,6 @@ struct self_pipe * self_pipe_create(void)
|
||||
{
|
||||
struct self_pipe *sig = xmalloc( sizeof( struct self_pipe ) );
|
||||
int fds[2];
|
||||
int fcntl_err;
|
||||
|
||||
if ( NULL == sig ) { return NULL; }
|
||||
|
||||
@@ -62,7 +61,7 @@ struct self_pipe * self_pipe_create(void)
|
||||
}
|
||||
|
||||
if ( fcntl( fds[0], F_SETFL, O_NONBLOCK ) || fcntl( fds[1], F_SETFL, O_NONBLOCK ) ) {
|
||||
fcntl_err = errno;
|
||||
int fcntl_err = errno;
|
||||
while( close( fds[0] ) == -1 && errno == EINTR );
|
||||
while( close( fds[1] ) == -1 && errno == EINTR );
|
||||
free( sig );
|
@@ -39,7 +39,6 @@ const char* sockaddr_address_string( const struct sockaddr* sa, char* dest, size
|
||||
struct sockaddr_un* un = ( struct sockaddr_un* ) sa;
|
||||
|
||||
unsigned short real_port = ntohs( in->sin_port ); // common to in and in6
|
||||
size_t size;
|
||||
const char* ret = NULL;
|
||||
|
||||
memset( dest, 0, len );
|
||||
@@ -57,7 +56,7 @@ const char* sockaddr_address_string( const struct sockaddr* sa, char* dest, size
|
||||
}
|
||||
|
||||
if ( NULL != ret && real_port > 0 && sa->sa_family != AF_UNIX ) {
|
||||
size = strlen( dest );
|
||||
size_t size = strlen( dest );
|
||||
snprintf( dest + size, len - size, " port %d", real_port );
|
||||
}
|
||||
|
||||
@@ -69,12 +68,48 @@ int sock_set_reuseaddr( int fd, int optval )
|
||||
return setsockopt( fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
int sock_set_keepalive_params( int fd, int time, int intvl, int probes)
|
||||
{
|
||||
if (sock_set_keepalive(fd, 1) ||
|
||||
sock_set_tcp_keepidle(fd, time) ||
|
||||
sock_set_tcp_keepintvl(fd, intvl) ||
|
||||
sock_set_tcp_keepcnt(fd, probes)) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sock_set_keepalive( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
int sock_set_tcp_keepidle( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, IPPROTO_TCP, TCP_KEEPIDLE, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
int sock_set_tcp_keepintvl( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, IPPROTO_TCP, TCP_KEEPINTVL, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
int sock_set_tcp_keepcnt( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, IPPROTO_TCP, TCP_KEEPCNT, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
/* Set the tcp_nodelay option */
|
||||
int sock_set_tcp_nodelay( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, IPPROTO_TCP, TCP_NODELAY, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
int sock_set_tcp_cork( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, IPPROTO_TCP, TCP_CORK, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
int sock_set_nonblock( int fd, int optval )
|
||||
{
|
||||
int flags = fcntl( fd, F_GETFL );
|
||||
@@ -96,7 +131,7 @@ int sock_try_bind( int fd, const struct sockaddr* sa )
|
||||
{
|
||||
int bind_result;
|
||||
char s_address[256];
|
||||
int retry = 1;
|
||||
int retry = 10;
|
||||
|
||||
sockaddr_address_string( sa, &s_address[0], 256 );
|
||||
|
||||
@@ -122,8 +157,11 @@ int sock_try_bind( int fd, const struct sockaddr* sa )
|
||||
* will cope with it.
|
||||
*/
|
||||
case EADDRNOTAVAIL:
|
||||
debug( "retrying" );
|
||||
sleep( 1 );
|
||||
retry--;
|
||||
if (retry) {
|
||||
debug( "retrying" );
|
||||
sleep( 1 );
|
||||
}
|
||||
continue;
|
||||
case EADDRINUSE:
|
||||
warn( "%s in use, giving up.", s_address );
|
@@ -14,14 +14,29 @@ size_t sockaddr_size(const struct sockaddr* sa);
|
||||
*/
|
||||
const char* sockaddr_address_string(const struct sockaddr* sa, char* dest, size_t len);
|
||||
|
||||
/* Configure TCP keepalive on a socket */
|
||||
int sock_set_keepalive_params( int fd, int time, int intvl, int probes);
|
||||
|
||||
/* Set the SOL_KEEPALIVE otion */
|
||||
int sock_set_keepalive(int fd, int optval);
|
||||
|
||||
/* Set the SOL_REUSEADDR otion */
|
||||
int sock_set_reuseaddr(int fd, int optval);
|
||||
|
||||
/* Set the tcp_keepidle option */
|
||||
int sock_set_tcp_keepidle(int fd, int optval);
|
||||
|
||||
/* Set the tcp_keepintvl option */
|
||||
int sock_set_tcp_keepintvl(int fd, int optval);
|
||||
|
||||
/* Set the tcp_keepcnt option */
|
||||
int sock_set_tcp_keepcnt(int fd, int optval);
|
||||
|
||||
/* Set the tcp_nodelay option */
|
||||
int sock_set_tcp_nodelay(int fd, int optval);
|
||||
|
||||
/* TODO: Set the tcp_cork option */
|
||||
// int sock_set_cork(int fd, int optval);
|
||||
/* Set the tcp_cork option */
|
||||
int sock_set_tcp_cork(int fd, int optval);
|
||||
|
||||
int sock_set_nonblock(int fd, int optval);
|
||||
|
@@ -13,6 +13,7 @@
|
||||
pthread_key_t cleanup_handler_key;
|
||||
|
||||
int log_level = 2;
|
||||
char *log_context = "";
|
||||
|
||||
void error_init(void)
|
||||
{
|
@@ -21,6 +21,9 @@ extern int log_level;
|
||||
/* set up the error globals */
|
||||
void error_init(void);
|
||||
|
||||
/* some context for the overall process that appears on each log line */
|
||||
extern char *log_context;
|
||||
|
||||
|
||||
void exit_err( const char * );
|
||||
|
||||
@@ -92,7 +95,7 @@ uint64_t monotonic_time_ms(void);
|
||||
|
||||
#define levstr(i) (i==0?'D':(i==1?'I':(i==2?'W':(i==3?'E':'F'))))
|
||||
|
||||
#define myloglev(level, msg, ...) mylog( level, "%"PRIu64":%c:%d %p %s:%d: "msg"\n", monotonic_time_ms(), levstr(level), getpid(),pthread_self(), __FILE__, __LINE__, ##__VA_ARGS__ )
|
||||
#define myloglev(level, msg, ...) mylog( level, "%"PRIu64":%c:%d %p %s %s:%d: "msg"\n", monotonic_time_ms(), levstr(level), getpid(),pthread_self(), log_context, __FILE__, __LINE__, ##__VA_ARGS__ )
|
||||
|
||||
#ifdef DEBUG
|
||||
# define debug(msg, ...) myloglev(0, msg, ##__VA_ARGS__)
|
||||
@@ -116,6 +119,7 @@ uint64_t monotonic_time_ms(void);
|
||||
#define fatal(msg, ...) do { \
|
||||
myloglev(4, msg, ##__VA_ARGS__); \
|
||||
error_handler(1); \
|
||||
exit(1); /* never-reached, this is to make static code analizer happy */ \
|
||||
} while(0)
|
||||
|
||||
|
@@ -2,12 +2,16 @@
|
||||
#include "mode.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
signal(SIGPIPE, SIG_IGN); /* calls to splice() unhelpfully throw this */
|
||||
error_init();
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
if (argc < 2) {
|
||||
exit_err( help_help_text );
|
||||
}
|
||||
|
@@ -1,14 +0,0 @@
|
||||
#ifndef PREFETCH_H
|
||||
#define PREFETCH_H
|
||||
|
||||
#define PREFETCH_BUFSIZE 4096
|
||||
|
||||
struct prefetch {
|
||||
int is_full;
|
||||
__be64 from;
|
||||
__be32 len;
|
||||
|
||||
char buffer[PREFETCH_BUFSIZE];
|
||||
};
|
||||
|
||||
#endif
|
@@ -1,4 +1,6 @@
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "mode.h"
|
||||
#include "util.h"
|
||||
@@ -12,6 +14,7 @@ static struct option proxy_options[] = {
|
||||
GETOPT_CONNECT_ADDR,
|
||||
GETOPT_CONNECT_PORT,
|
||||
GETOPT_BIND,
|
||||
GETOPT_CACHE,
|
||||
GETOPT_QUIET,
|
||||
GETOPT_VERBOSE,
|
||||
{0}
|
||||
@@ -27,22 +30,25 @@ static char proxy_help_text[] =
|
||||
"\t--" OPT_CONNECT_ADDR ",-C <ADDR>\tAddress of the proxied server.\n"
|
||||
"\t--" OPT_CONNECT_PORT ",-P <PORT>\tPort of the proxied server.\n"
|
||||
"\t--" OPT_BIND ",-b <ADDR>\tThe address we connect from, as a proxy.\n"
|
||||
"\t--" OPT_CACHE ",-c[=<CACHE-BYTES>]\tUse a RAM read cache of the given size.\n"
|
||||
QUIET_LINE
|
||||
VERBOSE_LINE;
|
||||
|
||||
static char proxy_default_cache_size[] = "4096";
|
||||
|
||||
void read_proxy_param(
|
||||
int c,
|
||||
char **downstream_addr,
|
||||
char **downstream_port,
|
||||
char **upstream_addr,
|
||||
char **upstream_port,
|
||||
char **bind_addr )
|
||||
char **bind_addr,
|
||||
char **cache_bytes)
|
||||
{
|
||||
switch( c ) {
|
||||
case 'h' :
|
||||
fprintf( stdout, "%s\n", proxy_help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 'l':
|
||||
*downstream_addr = optarg;
|
||||
break;
|
||||
@@ -58,6 +64,9 @@ void read_proxy_param(
|
||||
case 'b':
|
||||
*bind_addr = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
*cache_bytes = optarg ? optarg : proxy_default_cache_size;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
@@ -89,6 +98,7 @@ int main( int argc, char *argv[] )
|
||||
char *upstream_addr = NULL;
|
||||
char *upstream_port = NULL;
|
||||
char *bind_addr = NULL;
|
||||
char *cache_bytes = NULL;
|
||||
int success;
|
||||
|
||||
sigset_t mask;
|
||||
@@ -103,6 +113,8 @@ int main( int argc, char *argv[] )
|
||||
exit_action.sa_mask = mask;
|
||||
exit_action.sa_flags = 0;
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
while (1) {
|
||||
c = getopt_long( argc, argv, proxy_short_options, proxy_options, NULL );
|
||||
if ( -1 == c ) { break; }
|
||||
@@ -111,7 +123,8 @@ int main( int argc, char *argv[] )
|
||||
&downstream_port,
|
||||
&upstream_addr,
|
||||
&upstream_port,
|
||||
&bind_addr
|
||||
&bind_addr,
|
||||
&cache_bytes
|
||||
);
|
||||
}
|
||||
|
||||
@@ -128,7 +141,8 @@ int main( int argc, char *argv[] )
|
||||
downstream_port,
|
||||
upstream_addr,
|
||||
upstream_port,
|
||||
bind_addr
|
||||
bind_addr,
|
||||
cache_bytes
|
||||
);
|
||||
|
||||
/* Set these *after* proxy has been assigned to */
|
||||
|
68
src/proxy/prefetch.c
Normal file
68
src/proxy/prefetch.c
Normal file
@@ -0,0 +1,68 @@
|
||||
#include "prefetch.h"
|
||||
#include "util.h"
|
||||
|
||||
|
||||
struct prefetch* prefetch_create( size_t size_bytes ){
|
||||
|
||||
struct prefetch* out = xmalloc( sizeof( struct prefetch ) );
|
||||
NULLCHECK( out );
|
||||
|
||||
out->buffer = xmalloc( size_bytes );
|
||||
NULLCHECK( out->buffer );
|
||||
|
||||
out->size = size_bytes;
|
||||
out->is_full = 0;
|
||||
out->from = 0;
|
||||
out->len = 0;
|
||||
|
||||
return out;
|
||||
|
||||
}
|
||||
|
||||
void prefetch_destroy( struct prefetch *prefetch ) {
|
||||
if( prefetch ) {
|
||||
free( prefetch->buffer );
|
||||
free( prefetch );
|
||||
}
|
||||
}
|
||||
|
||||
size_t prefetch_size( struct prefetch *prefetch){
|
||||
if ( prefetch ) {
|
||||
return prefetch->size;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void prefetch_set_is_empty( struct prefetch *prefetch ){
|
||||
prefetch_set_full( prefetch, 0 );
|
||||
}
|
||||
|
||||
void prefetch_set_is_full( struct prefetch *prefetch ){
|
||||
prefetch_set_full( prefetch, 1 );
|
||||
}
|
||||
|
||||
void prefetch_set_full( struct prefetch *prefetch, int val ){
|
||||
if( prefetch ) {
|
||||
prefetch->is_full = val;
|
||||
}
|
||||
}
|
||||
|
||||
int prefetch_is_full( struct prefetch *prefetch ){
|
||||
if( prefetch ) {
|
||||
return prefetch->is_full;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int prefetch_contains( struct prefetch *prefetch, uint64_t from, uint32_t len ){
|
||||
NULLCHECK( prefetch );
|
||||
return from >= prefetch->from &&
|
||||
from + len <= prefetch->from + prefetch->len;
|
||||
}
|
||||
|
||||
char *prefetch_offset( struct prefetch *prefetch, uint64_t from ){
|
||||
NULLCHECK( prefetch );
|
||||
return prefetch->buffer + (from - prefetch->from);
|
||||
}
|
33
src/proxy/prefetch.h
Normal file
33
src/proxy/prefetch.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef PREFETCH_H
|
||||
#define PREFETCH_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define PREFETCH_BUFSIZE 4096
|
||||
|
||||
struct prefetch {
|
||||
/* True if there is data in the buffer. */
|
||||
int is_full;
|
||||
/* The start point of the current content of buffer */
|
||||
uint64_t from;
|
||||
/* The length of the current content of buffer */
|
||||
uint32_t len;
|
||||
|
||||
/* The total size of the buffer, in bytes. */
|
||||
size_t size;
|
||||
|
||||
char *buffer;
|
||||
};
|
||||
|
||||
struct prefetch* prefetch_create( size_t size_bytes );
|
||||
void prefetch_destroy( struct prefetch *prefetch );
|
||||
size_t prefetch_size( struct prefetch *);
|
||||
void prefetch_set_is_empty( struct prefetch *prefetch );
|
||||
void prefetch_set_is_full( struct prefetch *prefetch );
|
||||
void prefetch_set_full( struct prefetch *prefetch, int val );
|
||||
int prefetch_is_full( struct prefetch *prefetch );
|
||||
int prefetch_contains( struct prefetch *prefetch, uint64_t from, uint32_t len );
|
||||
char *prefetch_offset( struct prefetch *prefetch, uint64_t from );
|
||||
|
||||
#endif
|
@@ -1,9 +1,7 @@
|
||||
#include "proxy.h"
|
||||
#include "readwrite.h"
|
||||
|
||||
#ifdef PREFETCH
|
||||
#include "prefetch.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "ioutil.h"
|
||||
@@ -20,7 +18,8 @@ struct proxier* proxy_create(
|
||||
char* s_downstream_port,
|
||||
char* s_upstream_address,
|
||||
char* s_upstream_port,
|
||||
char* s_upstream_bind )
|
||||
char* s_upstream_bind,
|
||||
char* s_cache_bytes )
|
||||
{
|
||||
struct proxier* out;
|
||||
out = xmalloc( sizeof( struct proxier ) );
|
||||
@@ -65,31 +64,53 @@ struct proxier* proxy_create(
|
||||
out->downstream_fd = -1;
|
||||
out->upstream_fd = -1;
|
||||
|
||||
#ifdef PREFETCH
|
||||
out->prefetch = xmalloc( sizeof( struct prefetch ) );
|
||||
#endif
|
||||
out->prefetch = NULL;
|
||||
if ( s_cache_bytes ){
|
||||
int cache_bytes = atoi( s_cache_bytes );
|
||||
/* leaving this off or setting a cache size of zero or
|
||||
* less results in no cache.
|
||||
*/
|
||||
if ( cache_bytes >= 0 ) {
|
||||
out->prefetch = prefetch_create( cache_bytes );
|
||||
}
|
||||
}
|
||||
|
||||
out->init.buf = xmalloc( sizeof( struct nbd_init_raw ) );
|
||||
out->req.buf = xmalloc( NBD_MAX_SIZE );
|
||||
out->rsp.buf = xmalloc( NBD_MAX_SIZE );
|
||||
|
||||
/* Add on the request / reply size to our malloc to accommodate both
|
||||
* the struct and the data
|
||||
*/
|
||||
out->req.buf = xmalloc( NBD_MAX_SIZE + NBD_REQUEST_SIZE );
|
||||
out->rsp.buf = xmalloc( NBD_MAX_SIZE + NBD_REPLY_SIZE );
|
||||
|
||||
log_context = xmalloc( strlen(s_upstream_address) + strlen(s_upstream_port) + 2 );
|
||||
sprintf(log_context, "%s:%s", s_upstream_address, s_upstream_port);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int proxy_prefetches( struct proxier* proxy ) {
|
||||
NULLCHECK( proxy );
|
||||
return proxy->prefetch != NULL;
|
||||
}
|
||||
|
||||
int proxy_prefetch_bufsize( struct proxier* proxy ){
|
||||
NULLCHECK( proxy );
|
||||
return prefetch_size( proxy->prefetch );
|
||||
}
|
||||
|
||||
void proxy_destroy( struct proxier* proxy )
|
||||
{
|
||||
free( proxy->init.buf );
|
||||
free( proxy->req.buf );
|
||||
free( proxy->rsp.buf );
|
||||
#ifdef PREFETCH
|
||||
free( proxy->prefetch );
|
||||
#endif
|
||||
prefetch_destroy( proxy->prefetch );
|
||||
|
||||
free( proxy );
|
||||
}
|
||||
|
||||
/* Shared between our two different connect_to_upstream paths */
|
||||
void proxy_finish_connect_to_upstream( struct proxier *proxy, off64_t size );
|
||||
void proxy_finish_connect_to_upstream( struct proxier *proxy, uint64_t size );
|
||||
|
||||
/* Try to establish a connection to our upstream server. Return 1 on success,
|
||||
* 0 on failure. this is a blocking call that returns a non-blocking socket.
|
||||
@@ -102,7 +123,7 @@ int proxy_connect_to_upstream( struct proxier* proxy )
|
||||
}
|
||||
|
||||
int fd = socket_connect( &proxy->connect_to.generic, connect_from );
|
||||
off64_t size = 0;
|
||||
uint64_t size = 0;
|
||||
|
||||
if ( -1 == fd ) {
|
||||
return 0;
|
||||
@@ -174,7 +195,7 @@ error:
|
||||
return;
|
||||
}
|
||||
|
||||
void proxy_finish_connect_to_upstream( struct proxier *proxy, off64_t size ) {
|
||||
void proxy_finish_connect_to_upstream( struct proxier *proxy, uint64_t size ) {
|
||||
|
||||
if ( proxy->upstream_size == 0 ) {
|
||||
info( "Size of upstream image is %"PRIu64" bytes", size );
|
||||
@@ -186,6 +207,13 @@ void proxy_finish_connect_to_upstream( struct proxier *proxy, off64_t size ) {
|
||||
}
|
||||
|
||||
proxy->upstream_size = size;
|
||||
|
||||
if ( AF_UNIX != proxy->connect_to.family ) {
|
||||
if ( sock_set_tcp_nodelay( proxy->upstream_fd, 1 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to set TCP_NODELAY" ) );
|
||||
}
|
||||
}
|
||||
|
||||
info( "Connected to upstream on fd %i", proxy->upstream_fd );
|
||||
|
||||
return;
|
||||
@@ -272,10 +300,9 @@ static inline int proxy_state_upstream( int state )
|
||||
state == WRITE_TO_UPSTREAM || state == READ_FROM_UPSTREAM;
|
||||
}
|
||||
|
||||
#ifdef PREFETCH
|
||||
|
||||
int proxy_prefetch_for_request( struct proxier* proxy, int state )
|
||||
{
|
||||
NULLCHECK( proxy );
|
||||
struct nbd_request* req = &proxy->req_hdr;
|
||||
struct nbd_reply* rsp = &proxy->rsp_hdr;
|
||||
|
||||
@@ -284,23 +311,11 @@ int proxy_prefetch_for_request( struct proxier* proxy, int state )
|
||||
|
||||
int is_read = ( req->type & REQUEST_MASK ) == REQUEST_READ;
|
||||
|
||||
int prefetch_start = req->from;
|
||||
int prefetch_end = req->from + ( req->len * 2 );
|
||||
|
||||
/* We only want to consider prefetching if we know we're not
|
||||
* getting too much data back, if it's a read request, and if
|
||||
* the prefetch won't try to read past the end of the file.
|
||||
*/
|
||||
|
||||
int prefetching = req->len <= PREFETCH_BUFSIZE && is_read &&
|
||||
prefetch_start < prefetch_end && prefetch_end <= proxy->upstream_size;
|
||||
|
||||
if ( is_read ) {
|
||||
/* See if we can respond with what's in our prefetch
|
||||
* cache */
|
||||
if ( proxy->prefetch->is_full &&
|
||||
req->from == proxy->prefetch->from &&
|
||||
req->len == proxy->prefetch->len ) {
|
||||
if ( prefetch_is_full( proxy->prefetch ) &&
|
||||
prefetch_contains( proxy->prefetch, req->from, req->len ) ) {
|
||||
/* HUZZAH! A match! */
|
||||
debug( "Prefetch hit!" );
|
||||
|
||||
@@ -315,10 +330,11 @@ int proxy_prefetch_for_request( struct proxier* proxy, int state )
|
||||
/* and the data */
|
||||
memcpy(
|
||||
proxy->rsp.buf + NBD_REPLY_SIZE,
|
||||
proxy->prefetch->buffer, proxy->prefetch->len
|
||||
prefetch_offset( proxy->prefetch, req->from ),
|
||||
req->len
|
||||
);
|
||||
|
||||
proxy->rsp.size = NBD_REPLY_SIZE + proxy->prefetch->len;
|
||||
proxy->rsp.size = NBD_REPLY_SIZE + req->len;
|
||||
proxy->rsp.needle = 0;
|
||||
|
||||
/* return early, our work here is done */
|
||||
@@ -332,11 +348,24 @@ int proxy_prefetch_for_request( struct proxier* proxy, int state )
|
||||
* whether we can keep it or not.
|
||||
*/
|
||||
debug( "Blowing away prefetch cache on type %d request.", req->type );
|
||||
proxy->prefetch->is_full = 0;
|
||||
prefetch_set_is_empty( proxy->prefetch );
|
||||
}
|
||||
|
||||
debug( "Prefetch cache MISS!");
|
||||
|
||||
uint64_t prefetch_start = req->from;
|
||||
/* We prefetch what we expect to be the next request. */
|
||||
uint64_t prefetch_end = req->from + ( req->len * 2 );
|
||||
|
||||
/* We only want to consider prefetching if we know we're not
|
||||
* getting too much data back, if it's a read request, and if
|
||||
* the prefetch won't try to read past the end of the file.
|
||||
*/
|
||||
int prefetching =
|
||||
req->len <= prefetch_size( proxy->prefetch ) &&
|
||||
is_read &&
|
||||
prefetch_start < prefetch_end &&
|
||||
prefetch_end <= proxy->upstream_size;
|
||||
|
||||
/* We pull the request out of the proxy struct, rewrite the
|
||||
* request size, and write it back.
|
||||
@@ -347,7 +376,8 @@ int proxy_prefetch_for_request( struct proxier* proxy, int state )
|
||||
|
||||
req->len *= 2;
|
||||
|
||||
debug( "Prefetching %"PRIu32" bytes", req->len - proxy->prefetch_req_orig_len );
|
||||
debug( "Prefetching additional %"PRIu32" bytes",
|
||||
req->len - proxy->prefetch_req_orig_len );
|
||||
nbd_h2r_request( req, req_raw );
|
||||
}
|
||||
|
||||
@@ -364,10 +394,10 @@ int proxy_prefetch_for_reply( struct proxier* proxy, int state )
|
||||
|
||||
prefetched_bytes = proxy->req_hdr.len - proxy->prefetch_req_orig_len;
|
||||
|
||||
debug( "Prefetched %d bytes", prefetched_bytes );
|
||||
debug( "Prefetched additional %d bytes", prefetched_bytes );
|
||||
memcpy(
|
||||
proxy->rsp.buf + proxy->prefetch_req_orig_len,
|
||||
&(proxy->prefetch->buffer),
|
||||
proxy->prefetch->buffer,
|
||||
proxy->rsp.buf + proxy->prefetch_req_orig_len + NBD_REPLY_SIZE,
|
||||
prefetched_bytes
|
||||
);
|
||||
|
||||
@@ -382,13 +412,12 @@ int proxy_prefetch_for_reply( struct proxier* proxy, int state )
|
||||
proxy->rsp.size -= prefetched_bytes;
|
||||
|
||||
/* And we need to reset these */
|
||||
proxy->prefetch->is_full = 1;
|
||||
prefetch_set_is_full( proxy->prefetch );
|
||||
proxy->is_prefetch_req = 0;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
int proxy_read_from_downstream( struct proxier *proxy, int state )
|
||||
@@ -415,15 +444,18 @@ int proxy_read_from_downstream( struct proxier *proxy, int state )
|
||||
return EXIT;
|
||||
}
|
||||
|
||||
/* Simple validations */
|
||||
/* Simple validations -- the request / reply size have already
|
||||
* been taken into account in the xmalloc, so no need to worry
|
||||
* about them here
|
||||
*/
|
||||
if ( ( request->type & REQUEST_MASK ) == REQUEST_READ ) {
|
||||
if (request->len > ( NBD_MAX_SIZE - NBD_REPLY_SIZE ) ) {
|
||||
if ( request->len > NBD_MAX_SIZE ) {
|
||||
warn( "NBD read request size %"PRIu32" too large", request->len );
|
||||
return EXIT;
|
||||
}
|
||||
}
|
||||
if ( (request->type & REQUEST_MASK ) == REQUEST_WRITE ) {
|
||||
if (request->len > ( NBD_MAX_SIZE - NBD_REQUEST_SIZE ) ) {
|
||||
if ( request->len > NBD_MAX_SIZE ) {
|
||||
warn( "NBD write request size %"PRIu32" too large", request->len );
|
||||
return EXIT;
|
||||
}
|
||||
@@ -469,10 +501,8 @@ int proxy_continue_connecting_to_upstream( struct proxier* proxy, int state )
|
||||
return state;
|
||||
}
|
||||
|
||||
#ifdef PREFETCH
|
||||
/* Data may have changed while we were disconnected */
|
||||
proxy->prefetch->is_full = 0;
|
||||
#endif
|
||||
prefetch_set_is_empty( proxy->prefetch );
|
||||
|
||||
info( "Connected to upstream on fd %i", proxy->upstream_fd );
|
||||
return READ_INIT_FROM_UPSTREAM;
|
||||
@@ -492,7 +522,7 @@ int proxy_read_init_from_upstream( struct proxier* proxy, int state )
|
||||
}
|
||||
|
||||
if ( proxy->init.needle == proxy->init.size ) {
|
||||
off64_t upstream_size;
|
||||
uint64_t upstream_size;
|
||||
if ( !nbd_check_hello( (struct nbd_init_raw*) proxy->init.buf, &upstream_size ) ) {
|
||||
warn( "Upstream sent invalid init" );
|
||||
goto disconnect;
|
||||
@@ -518,11 +548,22 @@ int proxy_write_to_upstream( struct proxier* proxy, int state )
|
||||
ssize_t count;
|
||||
|
||||
// assert( state == WRITE_TO_UPSTREAM );
|
||||
|
||||
/* FIXME: We may set cork=1 multiple times as a result of this idiom.
|
||||
* Not a serious problem, but we could do better
|
||||
*/
|
||||
if ( proxy->req.needle == 0 && AF_UNIX != proxy->connect_to.family ) {
|
||||
if ( sock_set_tcp_cork( proxy->upstream_fd, 1 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to set TCP_CORK" ) );
|
||||
}
|
||||
}
|
||||
|
||||
count = iobuf_write( proxy->upstream_fd, &proxy->req );
|
||||
|
||||
if ( count == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to send request to upstream" ) );
|
||||
proxy->req.needle = 0;
|
||||
// We're throwing the socket away so no need to uncork
|
||||
return CONNECT_TO_UPSTREAM;
|
||||
}
|
||||
|
||||
@@ -531,6 +572,14 @@ int proxy_write_to_upstream( struct proxier* proxy, int state )
|
||||
* still need req.size if reading the reply fails - we disconnect
|
||||
* and resend the reply in that case - so keep it around for now. */
|
||||
proxy->req.needle = 0;
|
||||
|
||||
if ( AF_UNIX != proxy->connect_to.family ) {
|
||||
if ( sock_set_tcp_cork( proxy->upstream_fd, 0 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to unset TCP_CORK" ) );
|
||||
// TODO: should we return to CONNECT_TO_UPSTREAM in this instance?
|
||||
}
|
||||
}
|
||||
|
||||
return READ_FROM_UPSTREAM;
|
||||
}
|
||||
|
||||
@@ -670,7 +719,7 @@ void proxy_session( struct proxier* proxy )
|
||||
state_started = monotonic_time_ms();
|
||||
|
||||
debug(
|
||||
"State transitition from %s to %s",
|
||||
"State transition from %s to %s",
|
||||
proxy_session_state_names[old_state],
|
||||
proxy_session_state_names[state]
|
||||
);
|
||||
@@ -736,14 +785,12 @@ void proxy_session( struct proxier* proxy )
|
||||
case READ_FROM_DOWNSTREAM:
|
||||
if ( FD_ISSET( proxy->downstream_fd, &rfds ) ) {
|
||||
state = proxy_read_from_downstream( proxy, state );
|
||||
#ifdef PREFETCH
|
||||
/* Check if we can fulfil the request from prefetch, or
|
||||
* rewrite the request to fill the prefetch buffer if needed
|
||||
*/
|
||||
if ( state == WRITE_TO_UPSTREAM ) {
|
||||
if ( proxy_prefetches( proxy ) && state == WRITE_TO_UPSTREAM ) {
|
||||
state = proxy_prefetch_for_request( proxy, state );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case CONNECT_TO_UPSTREAM:
|
||||
@@ -774,12 +821,10 @@ void proxy_session( struct proxier* proxy )
|
||||
if ( FD_ISSET( proxy->upstream_fd, &rfds ) ) {
|
||||
state = proxy_read_from_upstream( proxy, state );
|
||||
}
|
||||
# ifdef PREFETCH
|
||||
/* Fill the prefetch buffer and rewrite the reply, if needed */
|
||||
if ( state == WRITE_TO_DOWNSTREAM ) {
|
||||
if ( proxy_prefetches( proxy ) && state == WRITE_TO_DOWNSTREAM ) {
|
||||
state = proxy_prefetch_for_reply( proxy, state );
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
case WRITE_TO_DOWNSTREAM:
|
||||
if ( FD_ISSET( proxy->downstream_fd, &wfds ) ) {
|
||||
@@ -797,6 +842,13 @@ void proxy_session( struct proxier* proxy )
|
||||
proxy_session_state_names[state]
|
||||
);
|
||||
state = CONNECT_TO_UPSTREAM;
|
||||
|
||||
/* Since we've timed out, we won't have gone through the timeout logic
|
||||
* in the various state handlers that resets these appropriately... */
|
||||
proxy->init.size = 0;
|
||||
proxy->init.needle = 0;
|
||||
proxy->rsp.size = 0;
|
||||
proxy->rsp.needle = 0;
|
||||
}
|
||||
}
|
||||
}
|
@@ -5,7 +5,6 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include "ioutil.h"
|
||||
#include "flexnbd.h"
|
||||
#include "parse.h"
|
||||
#include "nbdtypes.h"
|
||||
#include "self_pipe.h"
|
||||
@@ -21,9 +20,6 @@
|
||||
#define UPSTREAM_TIMEOUT 30 * 1000
|
||||
|
||||
struct proxier {
|
||||
/* The flexnbd wrapper this proxier is attached to */
|
||||
struct flexnbd* flexnbd;
|
||||
|
||||
/** address/port to bind to */
|
||||
union mysockaddr listen_on;
|
||||
|
||||
@@ -48,7 +44,7 @@ struct proxier {
|
||||
int upstream_fd;
|
||||
|
||||
/* This is the size we advertise to the downstream server */
|
||||
off64_t upstream_size;
|
||||
uint64_t upstream_size;
|
||||
|
||||
/* We transform the raw request header into here */
|
||||
struct nbd_request req_hdr;
|
||||
@@ -73,7 +69,8 @@ struct proxier {
|
||||
uint64_t req_count;
|
||||
int hello_sent;
|
||||
|
||||
#ifdef PREFETCH
|
||||
/** These are only used if we pass --cache on the command line */
|
||||
|
||||
/* While the in-flight request has been munged by prefetch, these two are
|
||||
* set to true, and the original length of the request, respectively */
|
||||
int is_prefetch_req;
|
||||
@@ -81,7 +78,8 @@ struct proxier {
|
||||
|
||||
/* And here, we actually store the prefetched data once it's returned */
|
||||
struct prefetch *prefetch;
|
||||
#endif
|
||||
|
||||
/** */
|
||||
};
|
||||
|
||||
struct proxier* proxy_create(
|
||||
@@ -89,7 +87,8 @@ struct proxier* proxy_create(
|
||||
char* s_downstream_port,
|
||||
char* s_upstream_address,
|
||||
char* s_upstream_port,
|
||||
char* s_upstream_bind );
|
||||
char* s_upstream_bind,
|
||||
char* s_cache_bytes);
|
||||
int do_proxy( struct proxier* proxy );
|
||||
void proxy_cleanup( struct proxier* proxy );
|
||||
void proxy_destroy( struct proxier* proxy );
|
@@ -1,23 +0,0 @@
|
||||
#ifndef READWRITE_H
|
||||
|
||||
#define READWRITE_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include "nbdtypes.h"
|
||||
|
||||
int socket_connect(struct sockaddr* to, struct sockaddr* from);
|
||||
int socket_nbd_read_hello(int fd, off64_t * size);
|
||||
int socket_nbd_write_hello(int fd, off64_t size);
|
||||
void socket_nbd_read(int fd, off64_t from, int len, int out_fd, void* out_buf, int timeout_secs);
|
||||
void socket_nbd_write(int fd, off64_t from, int len, int out_fd, void* out_buf, int timeout_secs);
|
||||
int socket_nbd_disconnect( int fd );
|
||||
|
||||
/* as you can see, we're slowly accumulating code that should really be in an
|
||||
* NBD library */
|
||||
|
||||
void nbd_hello_to_buf( struct nbd_init_raw* buf, off64_t out_size );
|
||||
int nbd_check_hello( struct nbd_init_raw* init_raw, off64_t* out_size );
|
||||
|
||||
#endif
|
||||
|
@@ -31,7 +31,7 @@ static int is_included_in_acl(int list_length, struct ip_and_mask (*list)[], uni
|
||||
for (i=0; i < list_length; i++) {
|
||||
struct ip_and_mask *entry = &(*list)[i];
|
||||
int testbits;
|
||||
unsigned char *raw_address1, *raw_address2;
|
||||
unsigned char *raw_address1 = NULL, *raw_address2 = NULL;
|
||||
|
||||
debug("checking acl entry %d (%d/%d)", i, test->generic.sa_family, entry->ip.family);
|
||||
|
@@ -7,43 +7,64 @@
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
||||
/*
|
||||
* Make the bitfield words 'opaque' to prevent code
|
||||
* poking at the bits directly without using these
|
||||
* accessors/macros
|
||||
*/
|
||||
typedef uint64_t bitfield_word_t;
|
||||
typedef bitfield_word_t * bitfield_p;
|
||||
|
||||
static inline char char_with_bit_set(uint64_t num) { return 1<<(num%8); }
|
||||
#define BITFIELD_WORD_SIZE sizeof(bitfield_word_t)
|
||||
#define BITS_PER_WORD (BITFIELD_WORD_SIZE * 8)
|
||||
|
||||
#define BIT_MASK(_idx) \
|
||||
(1LL << ((_idx) & (BITS_PER_WORD - 1)))
|
||||
#define BIT_WORD(_b, _idx) \
|
||||
((bitfield_word_t*)(_b))[(_idx) / BITS_PER_WORD]
|
||||
|
||||
/* Calculates the number of words needed to store _bytes number of bytes
|
||||
* this is added to accommodate code that wants to use bytes sizes
|
||||
*/
|
||||
#define BIT_WORDS_FOR_SIZE(_bytes) \
|
||||
((_bytes + (BITFIELD_WORD_SIZE-1)) / BITFIELD_WORD_SIZE)
|
||||
|
||||
/** Return the bit value ''idx'' in array ''b'' */
|
||||
static inline int bit_get(bitfield_p b, uint64_t idx) {
|
||||
return (BIT_WORD(b, idx) >> (idx & (BITS_PER_WORD-1))) & 1;
|
||||
}
|
||||
|
||||
/** Return 1 if the bit at ''idx'' in array ''b'' is set */
|
||||
static inline int bit_is_set(char* b, uint64_t idx) {
|
||||
return (b[idx/8] & char_with_bit_set(idx)) != 0;
|
||||
static inline int bit_is_set(bitfield_p b, uint64_t idx) {
|
||||
return bit_get(b, idx);
|
||||
}
|
||||
/** Return 1 if the bit at ''idx'' in array ''b'' is clear */
|
||||
static inline int bit_is_clear(char* b, uint64_t idx) {
|
||||
return !bit_is_set(b, idx);
|
||||
static inline int bit_is_clear(bitfield_p b, uint64_t idx) {
|
||||
return !bit_get(b, idx);
|
||||
}
|
||||
/** Tests whether the bit at ''idx'' in array ''b'' has value ''value'' */
|
||||
static inline int bit_has_value(char* b, uint64_t idx, int value) {
|
||||
if (value) { return bit_is_set(b, idx); }
|
||||
else { return bit_is_clear(b, idx); }
|
||||
static inline int bit_has_value(bitfield_p b, uint64_t idx, int value) {
|
||||
return bit_get(b, idx) == !!value;
|
||||
}
|
||||
/** Sets the bit ''idx'' in array ''b'' */
|
||||
static inline void bit_set(char* b, uint64_t idx) {
|
||||
b[idx/8] |= char_with_bit_set(idx);
|
||||
//__sync_fetch_and_or(b+(idx/8), char_with_bit_set(idx));
|
||||
static inline void bit_set(bitfield_p b, uint64_t idx) {
|
||||
BIT_WORD(b, idx) |= BIT_MASK(idx);
|
||||
}
|
||||
/** Clears the bit ''idx'' in array ''b'' */
|
||||
static inline void bit_clear(char* b, uint64_t idx) {
|
||||
b[idx/8] &= ~char_with_bit_set(idx);
|
||||
//__sync_fetch_and_nand(b+(idx/8), char_with_bit_set(idx));
|
||||
static inline void bit_clear(bitfield_p b, uint64_t idx) {
|
||||
BIT_WORD(b, idx) &= ~BIT_MASK(idx);
|
||||
}
|
||||
/** Sets ''len'' bits in array ''b'' starting at offset ''from'' */
|
||||
static inline void bit_set_range(char* b, uint64_t from, uint64_t len)
|
||||
static inline void bit_set_range(bitfield_p b, uint64_t from, uint64_t len)
|
||||
{
|
||||
for ( ; from%8 != 0 && len > 0 ; len-- ) {
|
||||
for ( ; (from % BITS_PER_WORD) != 0 && len > 0 ; len-- ) {
|
||||
bit_set( b, from++ );
|
||||
}
|
||||
|
||||
if (len >= 8) {
|
||||
memset(b+(from/8), 255, len/8 );
|
||||
if (len >= BITS_PER_WORD) {
|
||||
memset(&BIT_WORD(b, from), 0xff, len / 8 );
|
||||
from += len;
|
||||
len = (len%8);
|
||||
len = len % BITS_PER_WORD;
|
||||
from -= len;
|
||||
}
|
||||
|
||||
@@ -52,16 +73,16 @@ static inline void bit_set_range(char* b, uint64_t from, uint64_t len)
|
||||
}
|
||||
}
|
||||
/** Clears ''len'' bits in array ''b'' starting at offset ''from'' */
|
||||
static inline void bit_clear_range(char* b, uint64_t from, uint64_t len)
|
||||
static inline void bit_clear_range(bitfield_p b, uint64_t from, uint64_t len)
|
||||
{
|
||||
for ( ; from%8 != 0 && len > 0 ; len-- ) {
|
||||
for ( ; (from % BITS_PER_WORD) != 0 && len > 0 ; len-- ) {
|
||||
bit_clear( b, from++ );
|
||||
}
|
||||
|
||||
if (len >= 8) {
|
||||
memset(b+(from/8), 0, len/8 );
|
||||
if (len >= BITS_PER_WORD) {
|
||||
memset(&BIT_WORD(b, from), 0, len / 8 );
|
||||
from += len;
|
||||
len = (len%8);
|
||||
len = len % BITS_PER_WORD;
|
||||
from -= len;
|
||||
}
|
||||
|
||||
@@ -75,34 +96,33 @@ static inline void bit_clear_range(char* b, uint64_t from, uint64_t len)
|
||||
* bits that are the same as the first one specified. If ''run_is_set'' is
|
||||
* non-NULL, the value of that bit is placed into it.
|
||||
*/
|
||||
static inline uint64_t bit_run_count(char* b, uint64_t from, uint64_t len, int *run_is_set) {
|
||||
uint64_t* current_block;
|
||||
static inline uint64_t bit_run_count(bitfield_p b, uint64_t from, uint64_t len, int *run_is_set) {
|
||||
uint64_t count = 0;
|
||||
int first_value = bit_is_set(b, from);
|
||||
int first_value = bit_get(b, from);
|
||||
bitfield_word_t word_match = first_value ? -1 : 0;
|
||||
|
||||
if ( run_is_set != NULL ) {
|
||||
*run_is_set = first_value;
|
||||
}
|
||||
|
||||
for ( ; (from+count) % 64 != 0 && len > 0; len--) {
|
||||
if (bit_has_value(b, from+count, first_value)) {
|
||||
for ( ; ((from + count) % BITS_PER_WORD) != 0 && len > 0; len--) {
|
||||
if (bit_has_value(b, from + count, first_value)) {
|
||||
count++;
|
||||
} else {
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
for ( ; len >= 64 ; len -= 64 ) {
|
||||
current_block = (uint64_t*) (b + ((from+count)/8));
|
||||
if (*current_block == ( first_value ? UINT64_MAX : 0 ) ) {
|
||||
count += 64;
|
||||
for ( ; len >= BITS_PER_WORD ; len -= BITS_PER_WORD ) {
|
||||
if (BIT_WORD(b, from + count) == word_match) {
|
||||
count += BITS_PER_WORD;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for ( ; len > 0; len-- ) {
|
||||
if ( bit_has_value(b, from+count, first_value) ) {
|
||||
if ( bit_has_value(b, from + count, first_value) ) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
@@ -116,6 +136,7 @@ enum bitset_stream_events {
|
||||
BITSET_STREAM_ON = 2,
|
||||
BITSET_STREAM_OFF = 3
|
||||
};
|
||||
#define BITSET_STREAM_EVENTS_ENUM_SIZE 4
|
||||
|
||||
struct bitset_stream_entry {
|
||||
enum bitset_stream_events event;
|
||||
@@ -138,6 +159,7 @@ struct bitset_stream {
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t cond_not_full;
|
||||
pthread_cond_t cond_not_empty;
|
||||
uint64_t queued_bytes[BITSET_STREAM_EVENTS_ENUM_SIZE];
|
||||
};
|
||||
|
||||
|
||||
@@ -152,7 +174,7 @@ struct bitset {
|
||||
int resolution;
|
||||
struct bitset_stream *stream;
|
||||
int stream_enabled;
|
||||
char bits[];
|
||||
bitfield_word_t bits[];
|
||||
};
|
||||
|
||||
/** Allocate a bitset for a file of the given size, and chunks of the
|
||||
@@ -160,9 +182,12 @@ struct bitset {
|
||||
*/
|
||||
static inline struct bitset *bitset_alloc( uint64_t size, int resolution )
|
||||
{
|
||||
struct bitset *bitset = xmalloc(
|
||||
sizeof( struct bitset ) + ( size + resolution - 1 ) / resolution
|
||||
);
|
||||
// calculate a size to allocate that is a multiple of the size of the
|
||||
// bitfield word
|
||||
size_t bitfield_size =
|
||||
BIT_WORDS_FOR_SIZE((( size + resolution - 1 ) / resolution)) * sizeof( bitfield_word_t );
|
||||
struct bitset *bitset = xmalloc(sizeof( struct bitset ) + ( bitfield_size / 8 ) );
|
||||
|
||||
bitset->size = size;
|
||||
bitset->resolution = resolution;
|
||||
/* don't actually need to call pthread_mutex_destroy '*/
|
||||
@@ -217,13 +242,14 @@ static inline void bitset_stream_enqueue(
|
||||
stream->entries[stream->in].event = event;
|
||||
stream->entries[stream->in].from = from;
|
||||
stream->entries[stream->in].len = len;
|
||||
stream->queued_bytes[event] += len;
|
||||
|
||||
stream->size++;
|
||||
stream->in++;
|
||||
stream->in %= BITSET_STREAM_SIZE;
|
||||
|
||||
pthread_mutex_unlock( & stream->mutex );
|
||||
pthread_cond_broadcast( &stream->cond_not_empty );
|
||||
pthread_cond_signal( &stream->cond_not_empty );
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -234,6 +260,7 @@ static inline void bitset_stream_dequeue(
|
||||
)
|
||||
{
|
||||
struct bitset_stream * stream = set->stream;
|
||||
struct bitset_stream_entry * dequeued;
|
||||
|
||||
pthread_mutex_lock( &stream->mutex );
|
||||
|
||||
@@ -241,18 +268,21 @@ static inline void bitset_stream_dequeue(
|
||||
pthread_cond_wait( &stream->cond_not_empty, &stream->mutex );
|
||||
}
|
||||
|
||||
dequeued = &stream->entries[stream->out];
|
||||
|
||||
if ( out != NULL ) {
|
||||
out->event = stream->entries[stream->out].event;
|
||||
out->from = stream->entries[stream->out].from;
|
||||
out->len = stream->entries[stream->out].len;
|
||||
out->event = dequeued->event;
|
||||
out->from = dequeued->from;
|
||||
out->len = dequeued->len;
|
||||
}
|
||||
|
||||
stream->queued_bytes[dequeued->event] -= dequeued->len;
|
||||
stream->size--;
|
||||
stream->out++;
|
||||
stream->out %= BITSET_STREAM_SIZE;
|
||||
|
||||
pthread_mutex_unlock( &stream->mutex );
|
||||
pthread_cond_broadcast( &stream->cond_not_full );
|
||||
pthread_cond_signal( &stream->cond_not_full );
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -273,17 +303,10 @@ static inline uint64_t bitset_stream_queued_bytes(
|
||||
enum bitset_stream_events event
|
||||
)
|
||||
{
|
||||
uint64_t total = 0;
|
||||
int i;
|
||||
uint64_t total;
|
||||
|
||||
pthread_mutex_lock( &set->stream->mutex );
|
||||
|
||||
for ( i = set->stream->out; i < set->stream->in ; i++ ) {
|
||||
if ( set->stream->entries[i].event == event ) {
|
||||
total += set->stream->entries[i].len;
|
||||
}
|
||||
}
|
||||
|
||||
total = set->stream->queued_bytes[event];
|
||||
pthread_mutex_unlock( &set->stream->mutex );
|
||||
|
||||
return total;
|
@@ -15,6 +15,20 @@
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
|
||||
// When this signal is invoked, we call shutdown() on the client fd, which
|
||||
// results in the thread being wound up
|
||||
void client_killswitch_hit(int signal __attribute__ ((unused)), siginfo_t *info, void *ptr __attribute__ ((unused)))
|
||||
{
|
||||
int fd = info->si_value.sival_int;
|
||||
warn( "Killswitch for fd %i activated, calling shutdown on socket", fd );
|
||||
|
||||
FATAL_IF(
|
||||
-1 == shutdown( fd, SHUT_RDWR ),
|
||||
SHOW_ERRNO( "Failed to shutdown() the socket, killing the server" )
|
||||
);
|
||||
}
|
||||
|
||||
struct client *client_create( struct server *serve, int socket )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
@@ -25,6 +39,13 @@ struct client *client_create( struct server *serve, int socket )
|
||||
.sigev_signo = CLIENT_KILLSWITCH_SIGNAL
|
||||
};
|
||||
|
||||
/*
|
||||
* Our killswitch closes this socket, forcing read() and write() calls
|
||||
* blocked on it to return with an error. The thread then close()s the
|
||||
* socket itself, avoiding races.
|
||||
*/
|
||||
evp.sigev_value.sival_int = socket;
|
||||
|
||||
c = xmalloc( sizeof( struct client ) );
|
||||
c->stopped = 0;
|
||||
c->socket = socket;
|
||||
@@ -105,7 +126,9 @@ void write_not_zeroes(struct client* client, uint64_t from, uint64_t len)
|
||||
debug("(run adjusted to %d)", run);
|
||||
}
|
||||
|
||||
if (0) /* useful but expensive */
|
||||
/*
|
||||
// Useful but expensive
|
||||
if (0)
|
||||
{
|
||||
uint64_t i;
|
||||
fprintf(stderr, "full map resolution=%d: ", map->resolution);
|
||||
@@ -118,6 +141,7 @@ void write_not_zeroes(struct client* client, uint64_t from, uint64_t len)
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
*/
|
||||
|
||||
#define DO_READ(dst, len) ERROR_IF_NEGATIVE( \
|
||||
readloop( \
|
||||
@@ -199,36 +223,6 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
|
||||
NULLCHECK( out_request );
|
||||
|
||||
struct nbd_request_raw request_raw;
|
||||
fd_set fds;
|
||||
struct timeval * ptv = NULL;
|
||||
int fd_count;
|
||||
|
||||
/* We want a timeout if this is an inbound migration, but not otherwise.
|
||||
* This is compile-time selectable, as it will break mirror max_bps
|
||||
*/
|
||||
#ifdef HAS_LISTEN_TIMEOUT
|
||||
struct timeval tv = {CLIENT_MAX_WAIT_SECS, 0};
|
||||
|
||||
if ( !server_is_in_control( client->serve ) ) {
|
||||
ptv = &tv;
|
||||
}
|
||||
#endif
|
||||
|
||||
FD_ZERO(&fds);
|
||||
FD_SET(client->socket, &fds);
|
||||
self_pipe_fd_set( client->stop_signal, &fds );
|
||||
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, ptv);
|
||||
if ( fd_count == 0 ) {
|
||||
/* This "can't ever happen" */
|
||||
if ( NULL == ptv ) { fatal( "No FDs selected, and no timeout!" ); }
|
||||
else { error("Timed out waiting for I/O"); }
|
||||
}
|
||||
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
|
||||
|
||||
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
|
||||
debug("Client received stop signal.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (fd_read_request(client->socket, &request_raw) == -1) {
|
||||
*disconnected = 1;
|
||||
@@ -255,21 +249,20 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
|
||||
}
|
||||
|
||||
nbd_r2h_request( &request_raw, out_request );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd_write_reply( int fd, char *handle, int error )
|
||||
int fd_write_reply( int fd, uint64_t handle, int error )
|
||||
{
|
||||
struct nbd_reply reply;
|
||||
struct nbd_reply_raw reply_raw;
|
||||
|
||||
reply.magic = REPLY_MAGIC;
|
||||
reply.error = error;
|
||||
memcpy( reply.handle, handle, 8 );
|
||||
reply.handle.w = handle;
|
||||
|
||||
nbd_h2r_reply( &reply, &reply_raw );
|
||||
debug( "Replying with %s, %d", handle, error );
|
||||
debug( "Replying with handle=0x%08X, error=%"PRIu32, handle, error );
|
||||
|
||||
if( -1 == writeloop( fd, &reply_raw, sizeof( reply_raw ) ) ) {
|
||||
switch( errno ) {
|
||||
@@ -298,7 +291,7 @@ int fd_write_reply( int fd, char *handle, int error )
|
||||
*/
|
||||
int client_write_reply( struct client * client, struct nbd_request *request, int error )
|
||||
{
|
||||
return fd_write_reply( client->socket, request->handle, error);
|
||||
return fd_write_reply( client->socket, request->handle.w, error);
|
||||
}
|
||||
|
||||
|
||||
@@ -307,7 +300,7 @@ void client_write_init( struct client * client, uint64_t size )
|
||||
struct nbd_init init = {{0}};
|
||||
struct nbd_init_raw init_raw = {{0}};
|
||||
|
||||
memcpy( init.passwd, INIT_PASSWD, sizeof( INIT_PASSWD ) );
|
||||
memcpy( init.passwd, INIT_PASSWD, sizeof( init.passwd ) );
|
||||
init.magic = INIT_MAGIC;
|
||||
init.size = size;
|
||||
memset( init.reserved, 0, 128 );
|
||||
@@ -379,15 +372,15 @@ int client_request_needs_reply( struct client * client,
|
||||
* forever.
|
||||
*/
|
||||
if (request.magic != REQUEST_MAGIC) {
|
||||
warn("Bad magic 0x%08x from client", request.magic);
|
||||
warn("Bad magic 0x%08X from client", request.magic);
|
||||
client_write_reply( client, &request, EBADMSG );
|
||||
client->disconnect = 1; // no need to flush
|
||||
return 0;
|
||||
}
|
||||
|
||||
debug(
|
||||
"request type=%"PRIu32", from=%"PRIu64", len=%"PRIu32,
|
||||
request.type, request.from, request.len
|
||||
"request type=%"PRIu32", from=%"PRIu64", len=%"PRIu32", handle=0x%08X",
|
||||
request.type, request.from, request.len, request.handle
|
||||
);
|
||||
|
||||
/* check it's not out of range */
|
||||
@@ -416,7 +409,7 @@ int client_request_needs_reply( struct client * client,
|
||||
return 0;
|
||||
|
||||
default:
|
||||
fatal("Unknown request %08x", request.type);
|
||||
fatal("Unknown request 0x%08X", request.type);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@@ -427,7 +420,8 @@ void client_reply_to_read( struct client* client, struct nbd_request request )
|
||||
off64_t offset;
|
||||
|
||||
debug("request read %ld+%d", request.from, request.len);
|
||||
client_write_reply( client, &request, 0);
|
||||
sock_set_tcp_cork( client->socket, 1 );
|
||||
client_write_reply( client, &request, 0 );
|
||||
|
||||
offset = request.from;
|
||||
|
||||
@@ -443,12 +437,14 @@ void client_reply_to_read( struct client* client, struct nbd_request request )
|
||||
"sendfile failed from=%ld, len=%d",
|
||||
offset,
|
||||
request.len);
|
||||
|
||||
sock_set_tcp_cork( client->socket, 0 );
|
||||
}
|
||||
|
||||
|
||||
void client_reply_to_write( struct client* client, struct nbd_request request )
|
||||
{
|
||||
debug("request write %ld+%d", request.from, request.len);
|
||||
debug("request write from=%"PRIu64", len=%"PRIu32", handle=0x%08X", request.from, request.len, request.handle);
|
||||
if (client->serve->allocation_map_built) {
|
||||
write_not_zeroes( client, request.from, request.len );
|
||||
}
|
||||
@@ -553,35 +549,79 @@ int client_serve_request(struct client* client)
|
||||
struct nbd_request request = {0};
|
||||
int stop = 1;
|
||||
int disconnected = 0;
|
||||
fd_set rfds, efds;
|
||||
int fd_count;
|
||||
|
||||
if ( !client_read_request( client, &request, &disconnected ) ) { return stop; }
|
||||
if ( disconnected ) { return stop; }
|
||||
if ( !client_request_needs_reply( client, request ) ) {
|
||||
/* wait until there are some bytes on the fd before committing to reads
|
||||
* FIXME: this whole scheme is broken because we're using blocking reads.
|
||||
* read() can block directly after a select anyway, and it's possible that,
|
||||
* without the killswitch, we'd hang forever. With the killswitch, we just
|
||||
* hang for "a while". The Right Thing to do is to rewrite client.c to be
|
||||
* non-blocking.
|
||||
*/
|
||||
|
||||
FD_ZERO( &rfds );
|
||||
FD_SET( client->socket, &rfds );
|
||||
self_pipe_fd_set( client->stop_signal, &rfds );
|
||||
|
||||
FD_ZERO( &efds );
|
||||
FD_SET( client->socket, &efds );
|
||||
|
||||
fd_count = sock_try_select( FD_SETSIZE, &rfds, NULL, &efds, NULL );
|
||||
|
||||
if ( fd_count == 0 ) {
|
||||
/* This "can't ever happen" */
|
||||
fatal( "No FDs selected, and no timeout!" );
|
||||
}
|
||||
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
|
||||
|
||||
if ( self_pipe_fd_isset( client->stop_signal, &rfds ) ){
|
||||
debug("Client received stop signal.");
|
||||
return 1; // Don't try to serve more requests
|
||||
}
|
||||
|
||||
if ( FD_ISSET( client->socket, &efds ) ) {
|
||||
debug( "Client connection closed" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* We arm / disarm around the whole request cycle. The reason for this is
|
||||
* that the remote peer could uncleanly die at any point; if we're stuck on
|
||||
* a blocking read(), then that will hang for (almost) forever. This is bad
|
||||
* in general, makes the server respond only to kill -9, and breaks
|
||||
* outward mirroring in a most unpleasant way.
|
||||
*
|
||||
* Don't forget to disarm before exiting, no matter what!
|
||||
*
|
||||
* The replication is simple: open a connection to the flexnbd server, write
|
||||
* a single byte, and then wait.
|
||||
*
|
||||
*/
|
||||
client_arm_killswitch( client );
|
||||
|
||||
if ( !client_read_request( client, &request, &disconnected ) ) {
|
||||
client_disarm_killswitch( client );
|
||||
return stop;
|
||||
}
|
||||
if ( disconnected ) {
|
||||
client_disarm_killswitch( client );
|
||||
return stop;
|
||||
}
|
||||
|
||||
if ( !client_request_needs_reply( client, request ) ) {
|
||||
client_disarm_killswitch( client );
|
||||
return client->disconnect;
|
||||
}
|
||||
|
||||
{
|
||||
if ( !server_is_closed( client->serve ) ) {
|
||||
/* We arm / disarm around client_reply() to catch cases where the
|
||||
* remote peer sends part of a write request data before dying,
|
||||
* and cases where we send part of read reply data before they die.
|
||||
*
|
||||
* That last is theoretical right now, but could break us in the
|
||||
* same way as a half-write (which causes us to sit in read forever)
|
||||
*
|
||||
* We only arm/disarm inside the server io lock because it's common
|
||||
* during migrations for us to be hanging on that mutex for quite
|
||||
* a while while the final pass happens - it's held for the entire
|
||||
* time.
|
||||
*/
|
||||
client_arm_killswitch( client );
|
||||
client_reply( client, request );
|
||||
client_disarm_killswitch( client );
|
||||
stop = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
client_disarm_killswitch( client );
|
||||
return stop;
|
||||
}
|
||||
|
||||
@@ -596,6 +636,9 @@ void client_cleanup(struct client* client,
|
||||
{
|
||||
info("client cleanup for client %p", client);
|
||||
|
||||
/* If the thread hits an error, we need to ensure this is off */
|
||||
client_disarm_killswitch( client );
|
||||
|
||||
if (client->socket) {
|
||||
FATAL_IF_NEGATIVE( close(client->socket),
|
||||
"Error closing client socket %d",
|
@@ -4,18 +4,6 @@
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef HAS_LISTEN_TIMEOUT
|
||||
|
||||
/** CLIENT_MAX_WAIT_SECS
|
||||
* This is the length of time an inbound migration will wait for a fresh
|
||||
* write before assuming the source has Gone Away. Note: it is *not*
|
||||
* the time from one write to the next, it is the gap between the end of
|
||||
* one write and the start of the next.
|
||||
*/
|
||||
#define CLIENT_MAX_WAIT_SECS 5
|
||||
|
||||
#endif
|
||||
|
||||
/** CLIENT_HANDLER_TIMEOUT
|
||||
* This is the length of time (in seconds) any request can be outstanding for.
|
||||
* If we spend longer than this in a request, the whole server is killed.
|
||||
@@ -24,8 +12,7 @@
|
||||
|
||||
/** CLIENT_KILLSWITCH_SIGNAL
|
||||
* The signal number we use to kill the server when *any* killswitch timer
|
||||
* fires. We don't actually need to install a signal handler for it, the default
|
||||
* behaviour is perfectly fine.
|
||||
* fires. The handler gets the fd of the client socket to work with.
|
||||
*/
|
||||
#define CLIENT_KILLSWITCH_SIGNAL ( SIGRTMIN + 1 )
|
||||
|
||||
@@ -58,6 +45,7 @@ struct client {
|
||||
|
||||
};
|
||||
|
||||
void client_killswitch_hit(int signal, siginfo_t *info, void *ptr);
|
||||
|
||||
void* client_serve(void* client_uncast);
|
||||
struct client * client_create( struct server * serve, int socket );
|
@@ -101,12 +101,24 @@ struct flexnbd * flexnbd_create_serving(
|
||||
max_nbd_clients,
|
||||
use_killswitch,
|
||||
1);
|
||||
flexnbd_create_shared( flexnbd,
|
||||
s_ctrl_sock );
|
||||
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
||||
|
||||
// Beats installing one handler per client instance
|
||||
if ( use_killswitch ) {
|
||||
struct sigaction act = {
|
||||
.sa_sigaction = client_killswitch_hit,
|
||||
.sa_flags = SA_RESTART | SA_SIGINFO
|
||||
};
|
||||
|
||||
FATAL_UNLESS(
|
||||
0 == sigaction( CLIENT_KILLSWITCH_SIGNAL, &act, NULL ),
|
||||
"Installing client killswitch signal failed"
|
||||
);
|
||||
}
|
||||
|
||||
return flexnbd;
|
||||
}
|
||||
|
||||
|
||||
struct flexnbd * flexnbd_create_listening(
|
||||
char* s_ip_address,
|
||||
char* s_port,
|
||||
@@ -127,6 +139,10 @@ struct flexnbd * flexnbd_create_listening(
|
||||
s_acl_entries,
|
||||
1, 0, 0);
|
||||
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
||||
|
||||
// listen can't use killswitch, as mirror may pause on sending things
|
||||
// for a very long time.
|
||||
|
||||
return flexnbd;
|
||||
}
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#include "mirror.h"
|
||||
#include "serve.h"
|
||||
#include "proxy.h"
|
||||
#include "client.h"
|
||||
#include "self_pipe.h"
|
||||
#include "mbox.h"
|
||||
#include "control.h"
|
@@ -70,6 +70,7 @@ struct mirror_ctrl {
|
||||
|
||||
/* libev stuff */
|
||||
struct ev_loop *ev_loop;
|
||||
ev_timer begin_watcher;
|
||||
ev_io read_watcher;
|
||||
ev_io write_watcher;
|
||||
ev_timer timeout_watcher;
|
||||
@@ -138,7 +139,7 @@ enum mirror_state mirror_get_state( struct mirror * mirror )
|
||||
void mirror_init( struct mirror * mirror, const char * filename )
|
||||
{
|
||||
int map_fd;
|
||||
off64_t size;
|
||||
uint64_t size;
|
||||
|
||||
NULLCHECK( mirror );
|
||||
NULLCHECK( filename );
|
||||
@@ -213,18 +214,6 @@ void mirror_destroy( struct mirror *mirror )
|
||||
/** The mirror code will split NBD writes, making them this long as a maximum */
|
||||
static const int mirror_longest_write = 8<<20;
|
||||
|
||||
/** If, during a mirror pass, we have sent this number of bytes or fewer, we
|
||||
* go to freeze the I/O and finish it off. This is just a guess.
|
||||
*/
|
||||
static const unsigned int mirror_last_pass_after_bytes_written = 100<<20;
|
||||
|
||||
/** The largest number of full passes we'll do - the last one will always
|
||||
* cause the I/O to freeze, however many bytes are left to copy.
|
||||
*/
|
||||
static const int mirror_maximum_passes = 7;
|
||||
#define mirror_last_pass (mirror_maximum_passes - 1)
|
||||
|
||||
|
||||
/* This must not be called if there's any chance of further I/O. Methods to
|
||||
* ensure this include:
|
||||
* - Ensure image size is 0
|
||||
@@ -281,7 +270,7 @@ void mirror_cleanup( struct server * serve,
|
||||
}
|
||||
|
||||
|
||||
int mirror_connect( struct mirror * mirror, off64_t local_size )
|
||||
int mirror_connect( struct mirror * mirror, uint64_t local_size )
|
||||
{
|
||||
struct sockaddr * connect_from = NULL;
|
||||
int connected = 0;
|
||||
@@ -303,7 +292,7 @@ int mirror_connect( struct mirror * mirror, off64_t local_size )
|
||||
"Select failed." );
|
||||
|
||||
if( FD_ISSET( mirror->client, &fds ) ){
|
||||
off64_t remote_size;
|
||||
uint64_t remote_size;
|
||||
if ( socket_nbd_read_hello( mirror->client, &remote_size ) ) {
|
||||
if( remote_size == local_size ){
|
||||
connected = 1;
|
||||
@@ -347,6 +336,19 @@ int mirror_should_quit( struct mirror * mirror )
|
||||
}
|
||||
}
|
||||
|
||||
/* Bandwidth limiting - we hang around if bps is too high, unless we need to
|
||||
* empty out the bitset stream a bit */
|
||||
int mirror_should_wait( struct mirror_ctrl *ctrl )
|
||||
{
|
||||
int bps_over = server_mirror_bps( ctrl->serve ) >
|
||||
ctrl->serve->mirror->max_bytes_per_second;
|
||||
|
||||
int stream_full = bitset_stream_size( ctrl->serve->allocation_map ) >
|
||||
( BITSET_STREAM_SIZE / 2 );
|
||||
|
||||
return bps_over && !stream_full;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there's an event in the bitset stream of the serve allocation map, we
|
||||
* use it to construct the next transfer request, covering precisely the area
|
||||
@@ -369,7 +371,7 @@ int mirror_setup_next_xfer( struct mirror_ctrl *ctrl )
|
||||
* full, and stop when it's a quarter full. This stops a busy client from
|
||||
* stalling a migration forever. FIXME: made-up numbers.
|
||||
*/
|
||||
if ( bitset_stream_size( serve->allocation_map ) > BITSET_STREAM_SIZE / 2 ) {
|
||||
if ( mirror->offset < serve->size && bitset_stream_size( serve->allocation_map ) > BITSET_STREAM_SIZE / 2 ) {
|
||||
ctrl->clear_events = 1;
|
||||
}
|
||||
|
||||
@@ -410,7 +412,7 @@ int mirror_setup_next_xfer( struct mirror_ctrl *ctrl )
|
||||
struct nbd_request req = {
|
||||
.magic = REQUEST_MAGIC,
|
||||
.type = REQUEST_WRITE,
|
||||
.handle = ".MIRROR.",
|
||||
.handle.b = ".MIRROR.",
|
||||
.from = current,
|
||||
.len = run
|
||||
};
|
||||
@@ -425,24 +427,6 @@ int mirror_setup_next_xfer( struct mirror_ctrl *ctrl )
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint64_t mirror_current_bps( struct mirror * mirror )
|
||||
{
|
||||
uint64_t duration_ms = monotonic_time_ms() - mirror->migration_started;
|
||||
return mirror->all_dirty / ( ( duration_ms / 1000 ) + 1 );
|
||||
}
|
||||
|
||||
int mirror_exceeds_max_bps( struct mirror * mirror )
|
||||
{
|
||||
uint64_t mig_speed = mirror_current_bps( mirror );
|
||||
debug( "current_bps: %"PRIu64"; max_bps: %"PRIu64, mig_speed, mirror->max_bytes_per_second );
|
||||
|
||||
if ( mig_speed > mirror->max_bytes_per_second ) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ONLY CALL THIS AFTER CLOSING CLIENTS
|
||||
void mirror_complete( struct server *serve )
|
||||
{
|
||||
@@ -478,6 +462,12 @@ static void mirror_write_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
|
||||
debug( "Mirror write callback invoked with events %d. fd: %i", revents, ctrl->mirror->client );
|
||||
|
||||
/* FIXME: We can end up corking multiple times in unusual circumstances; this
|
||||
* is annoying, but harmless */
|
||||
if ( xfer->written == 0 ) {
|
||||
sock_set_tcp_cork( ctrl->mirror->client, 1 );
|
||||
}
|
||||
|
||||
if ( xfer->written < hdr_size ) {
|
||||
data_loc = ( (char*) &xfer->hdr.req_raw ) + ctrl->xfer.written;
|
||||
to_write = hdr_size - xfer->written;
|
||||
@@ -486,7 +476,7 @@ static void mirror_write_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
to_write = xfer->len - ( ctrl->xfer.written - hdr_size );
|
||||
}
|
||||
|
||||
// Actually read some bytes
|
||||
// Actually write some bytes
|
||||
if ( ( count = write( ctrl->mirror->client, data_loc, to_write ) ) < 0 ) {
|
||||
if ( errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR ) {
|
||||
warn( SHOW_ERRNO( "Couldn't write to listener" ) );
|
||||
@@ -496,13 +486,16 @@ static void mirror_write_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
}
|
||||
debug( "Wrote %"PRIu64" bytes", count );
|
||||
debug( "to_write was %"PRIu64", xfer->written was %"PRIu64, to_write, xfer->written );
|
||||
ctrl->xfer.written += count;
|
||||
|
||||
// We wrote some bytes, so reset the timer
|
||||
ev_timer_again( ctrl->ev_loop, &ctrl->timeout_watcher );
|
||||
// We wrote some bytes, so reset the timer and keep track for the next pass
|
||||
if ( count > 0 ) {
|
||||
ctrl->xfer.written += count;
|
||||
ev_timer_again( ctrl->ev_loop, &ctrl->timeout_watcher );
|
||||
}
|
||||
|
||||
// All bytes written, so now we need to read the NBD reply back.
|
||||
if ( ctrl->xfer.written == ctrl->xfer.len + hdr_size ) {
|
||||
sock_set_tcp_cork( ctrl->mirror->client, 0 ) ;
|
||||
ev_io_start( loop, &ctrl->read_watcher );
|
||||
ev_io_stop( loop, &ctrl->write_watcher );
|
||||
}
|
||||
@@ -575,7 +568,7 @@ static void mirror_read_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
return;
|
||||
}
|
||||
|
||||
if ( memcmp( ".MIRROR.", &rsp.handle[0], 8 ) != 0 ) {
|
||||
if ( memcmp( ".MIRROR.", rsp.handle.b, 8 ) != 0 ) {
|
||||
warn( "Bad handle returned from listener" );
|
||||
ev_break( loop, EVBREAK_ONE );
|
||||
return;
|
||||
@@ -584,10 +577,17 @@ static void mirror_read_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
/* transfer was completed, so now we need to either set up the next
|
||||
* transfer of this pass, set up the first transfer of the next pass, or
|
||||
* complete the migration */
|
||||
m->all_dirty += xfer->len;
|
||||
xfer->read = 0;
|
||||
xfer->written = 0;
|
||||
|
||||
/* We don't account for bytes written in this mode, to stop high-throughput
|
||||
* discs getting stuck in "drain the event queue!" mode forever
|
||||
*/
|
||||
if ( !ctrl->clear_events ) {
|
||||
m->all_dirty += xfer->len;
|
||||
}
|
||||
|
||||
|
||||
/* This next bit could take a little while, which is fine */
|
||||
ev_timer_stop( ctrl->ev_loop, &ctrl->timeout_watcher );
|
||||
|
||||
@@ -601,17 +601,15 @@ static void mirror_read_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
int next_xfer = mirror_setup_next_xfer( ctrl );
|
||||
debug( "next_xfer: %d", next_xfer );
|
||||
|
||||
/* Regardless of time estimates, if there's no waiting transfer, we can
|
||||
* */
|
||||
if ( !ctrl->clients_closed && ( !next_xfer || server_mirror_eta( ctrl->serve ) < 60 ) ) {
|
||||
/* Regardless of time estimates, if there's no waiting transfer, we can start closing clients down. */
|
||||
if ( !ctrl->clients_closed && ( !next_xfer || server_mirror_eta( ctrl->serve ) < MS_CONVERGE_TIME_SECS ) ) {
|
||||
info( "Closing clients to allow mirroring to converge" );
|
||||
server_forbid_new_clients( ctrl->serve );
|
||||
server_close_clients( ctrl->serve );
|
||||
server_join_clients( ctrl->serve );
|
||||
ctrl->clients_closed = 1;
|
||||
|
||||
/* One more try - a new event may have been pushed since our last check
|
||||
*/
|
||||
/* One more try - a new event may have been pushed since our last check */
|
||||
if ( !next_xfer ) {
|
||||
next_xfer = mirror_setup_next_xfer( ctrl );
|
||||
}
|
||||
@@ -630,7 +628,7 @@ static void mirror_read_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
|
||||
/* FIXME: Should we ignore the bwlimit after server_close_clients has been called? */
|
||||
|
||||
if ( mirror_exceeds_max_bps( m ) ) {
|
||||
if ( mirror_should_wait( ctrl ) ) {
|
||||
/* We're over the bandwidth limit, so don't move onto the next transfer
|
||||
* yet. Our limit_watcher will move us on once we're OK. timeout_watcher
|
||||
* was disabled further up, so don't need to stop it here too */
|
||||
@@ -645,7 +643,7 @@ static void mirror_read_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
return;
|
||||
}
|
||||
|
||||
void mirror_timeout_cb( struct ev_loop *loop, ev_timer *w __attribute__((unused)), int revents )
|
||||
static void mirror_timeout_cb( struct ev_loop *loop, ev_timer *w __attribute__((unused)), int revents )
|
||||
{
|
||||
if ( !(revents & EV_TIMER ) ) {
|
||||
warn( "Mirror timeout called but no timer event signalled" );
|
||||
@@ -657,7 +655,7 @@ void mirror_timeout_cb( struct ev_loop *loop, ev_timer *w __attribute__((unused)
|
||||
return;
|
||||
}
|
||||
|
||||
void mirror_abandon_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
static void mirror_abandon_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
{
|
||||
struct mirror_ctrl* ctrl = (struct mirror_ctrl*) w->data;
|
||||
NULLCHECK( ctrl );
|
||||
@@ -674,7 +672,8 @@ void mirror_abandon_cb( struct ev_loop *loop, ev_io *w, int revents )
|
||||
return;
|
||||
}
|
||||
|
||||
void mirror_limit_cb( struct ev_loop *loop, ev_timer *w, int revents )
|
||||
|
||||
static void mirror_limit_cb( struct ev_loop *loop, ev_timer *w, int revents )
|
||||
{
|
||||
struct mirror_ctrl* ctrl = (struct mirror_ctrl*) w->data;
|
||||
NULLCHECK( ctrl );
|
||||
@@ -684,7 +683,7 @@ void mirror_limit_cb( struct ev_loop *loop, ev_timer *w, int revents )
|
||||
return;
|
||||
}
|
||||
|
||||
if ( mirror_exceeds_max_bps( ctrl->mirror ) ) {
|
||||
if ( mirror_should_wait( ctrl ) ) {
|
||||
debug( "max_bps exceeded, waiting", ctrl->mirror->max_bytes_per_second );
|
||||
ev_timer_again( loop, w );
|
||||
} else {
|
||||
@@ -698,6 +697,37 @@ void mirror_limit_cb( struct ev_loop *loop, ev_timer *w, int revents )
|
||||
return;
|
||||
}
|
||||
|
||||
/* We use this to periodically check whether the allocation map has built, and
|
||||
* if it has, start migrating. If it's not finished, then enabling the bitset
|
||||
* stream does not go well for us.
|
||||
*/
|
||||
static void mirror_begin_cb( struct ev_loop *loop, ev_timer *w, int revents )
|
||||
{
|
||||
struct mirror_ctrl* ctrl = (struct mirror_ctrl*) w->data;
|
||||
NULLCHECK( ctrl );
|
||||
|
||||
if ( !(revents & EV_TIMER ) ) {
|
||||
warn( "Mirror limit callback executed but no timer event signalled" );
|
||||
return;
|
||||
}
|
||||
|
||||
if ( ctrl->serve->allocation_map_built || ctrl->serve->allocation_map_not_built ) {
|
||||
info( "allocation map builder is finished, beginning migration" );
|
||||
ev_timer_stop( loop, w );
|
||||
/* Start by writing xfer 0 to the listener */
|
||||
ev_io_start( loop, &ctrl->write_watcher );
|
||||
/* We want to timeout during the first write as well as subsequent ones */
|
||||
ev_timer_again( loop, &ctrl->timeout_watcher );
|
||||
/* We're now interested in events */
|
||||
bitset_enable_stream( ctrl->serve->allocation_map );
|
||||
} else {
|
||||
/* not done yet, so wait another second */
|
||||
ev_timer_again( loop, w );
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void mirror_run( struct server *serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
@@ -727,7 +757,12 @@ void mirror_run( struct server *serve )
|
||||
|
||||
ctrl.ev_loop = EV_DEFAULT;
|
||||
|
||||
/* gcc warns on -O2. clang is fine. Seems to be the fault of ev.h */
|
||||
/* gcc warns with -Wstrict-aliasing on -O2. clang doesn't
|
||||
* implement this warning. Seems to be the fault of ev.h */
|
||||
ev_init( &ctrl.begin_watcher, mirror_begin_cb );
|
||||
ctrl.begin_watcher.repeat = 1.0; // We check bps every second. seems sane.
|
||||
ctrl.begin_watcher.data = (void*) &ctrl;
|
||||
|
||||
ev_io_init( &ctrl.read_watcher, mirror_read_cb, m->client, EV_READ );
|
||||
ctrl.read_watcher.data = (void*) &ctrl;
|
||||
|
||||
@@ -735,7 +770,22 @@ void mirror_run( struct server *serve )
|
||||
ctrl.write_watcher.data = (void*) &ctrl;
|
||||
|
||||
ev_init( &ctrl.timeout_watcher, mirror_timeout_cb );
|
||||
ctrl.timeout_watcher.repeat = MS_REQUEST_LIMIT_SECS_F ;
|
||||
|
||||
char * env_request_limit = getenv( "FLEXNBD_MS_REQUEST_LIMIT_SECS" );
|
||||
double timeout_limit = MS_REQUEST_LIMIT_SECS_F;
|
||||
|
||||
if ( NULL != env_request_limit ) {
|
||||
char *endptr = NULL;
|
||||
errno = 0;
|
||||
double limit = strtod( env_request_limit, &endptr );
|
||||
warn( SHOW_ERRNO( "Got %f from strtod", limit ) );
|
||||
|
||||
if ( errno == 0 ) {
|
||||
timeout_limit = limit;
|
||||
}
|
||||
}
|
||||
|
||||
ctrl.timeout_watcher.repeat = timeout_limit;
|
||||
|
||||
ev_init( &ctrl.limit_watcher, mirror_limit_cb );
|
||||
ctrl.limit_watcher.repeat = 1.0; // We check bps every second. seems sane.
|
||||
@@ -751,19 +801,23 @@ void mirror_run( struct server *serve )
|
||||
"Couldn't find first transfer for mirror!"
|
||||
);
|
||||
|
||||
/* Start by writing xfer 0 to the listener */
|
||||
ev_io_start( ctrl.ev_loop, &ctrl.write_watcher );
|
||||
|
||||
/* We want to timeout during the first write as well as subsequent ones */
|
||||
ev_timer_again( ctrl.ev_loop, &ctrl.timeout_watcher );
|
||||
if ( serve->allocation_map_built ) {
|
||||
/* Start by writing xfer 0 to the listener */
|
||||
ev_io_start( ctrl.ev_loop, &ctrl.write_watcher );
|
||||
/* We want to timeout during the first write as well as subsequent ones */
|
||||
ev_timer_again( ctrl.ev_loop, &ctrl.timeout_watcher );
|
||||
bitset_enable_stream( serve->allocation_map );
|
||||
} else {
|
||||
debug( "Waiting for allocation map to be built" );
|
||||
ev_timer_again( ctrl.ev_loop, &ctrl.begin_watcher );
|
||||
}
|
||||
|
||||
/* Everything up to here is blocking. We switch to non-blocking so we
|
||||
* can handle rate-limiting and weird error conditions better. TODO: We
|
||||
* should expand the event loop upwards so we can do the same there too */
|
||||
sock_set_nonblock( m->client, 1 );
|
||||
|
||||
bitset_enable_stream( serve->allocation_map );
|
||||
|
||||
info( "Entering event loop" );
|
||||
ev_run( ctrl.ev_loop, 0 );
|
||||
info( "Exited event loop" );
|
||||
@@ -784,12 +838,11 @@ void mirror_run( struct server *serve )
|
||||
* call retries the migration from scratch. */
|
||||
|
||||
if ( m->commit_state != MS_DONE ) {
|
||||
error( "Event loop exited, but mirroring is not complete" );
|
||||
|
||||
/* mirror_reset will be called before a retry, so keeping hold of events
|
||||
* between now and our next mirroring attempt is not useful
|
||||
*/
|
||||
bitset_disable_stream( serve->allocation_map );
|
||||
error( "Event loop exited, but mirroring is not complete" );
|
||||
}
|
||||
|
||||
return;
|
||||
@@ -869,7 +922,7 @@ void* mirror_runner(void* serve_params_uncast)
|
||||
* for us ). But if we've failed and are going to retry on the next run, we
|
||||
* must close this socket here to have any chance of it succeeding.
|
||||
*/
|
||||
if ( !mirror->client < 0 ) {
|
||||
if ( !(mirror->client < 0) ) {
|
||||
sock_try_close( mirror->client );
|
||||
mirror->client = -1;
|
||||
}
|
||||
@@ -1016,4 +1069,3 @@ void * mirror_super_runner( void * serve_uncast )
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -18,6 +18,18 @@ enum mirror_state;
|
||||
*/
|
||||
#define MS_CONNECT_TIME_SECS 60
|
||||
|
||||
/* MS_MAX_DOWNTIME_SECS
|
||||
* The length of time a migration must be estimated to have remaining for us to
|
||||
* disconnect clients for convergence
|
||||
*
|
||||
* TODO: Make this configurable so refusing-to-converge clients can be manually
|
||||
* fixed.
|
||||
* TODO: Make this adaptive - 5 seconds is fine, as long as we can guarantee
|
||||
* that all migrations will be able to converge in time. We'd add a new
|
||||
* state between open and closed, where gradually-increasing latency is
|
||||
* added to client requests to allow the mirror to be faster.
|
||||
*/
|
||||
#define MS_CONVERGE_TIME_SECS 5
|
||||
|
||||
/* MS_HELLO_TIME_SECS
|
||||
* The length of time the sender will wait for the NBD hello message
|
||||
@@ -38,9 +50,12 @@ enum mirror_state;
|
||||
* request, this is the time between the end of the NBD request and the
|
||||
* start of the NBD reply. For a write request, this is the time
|
||||
* between the end of the written data and the start of the NBD reply.
|
||||
* Can be overridden by the environment variable:
|
||||
* FLEXNBD_MS_REQUEST_LIMIT_SECS
|
||||
*/
|
||||
#define MS_REQUEST_LIMIT_SECS 4
|
||||
#define MS_REQUEST_LIMIT_SECS_F 4.0
|
||||
|
||||
#define MS_REQUEST_LIMIT_SECS 60
|
||||
#define MS_REQUEST_LIMIT_SECS_F 60.0
|
||||
|
||||
enum mirror_finish_action {
|
||||
ACTION_EXIT,
|
||||
@@ -122,7 +137,5 @@ struct mirror_super * mirror_super_create(
|
||||
);
|
||||
void * mirror_super_runner( void * serve_uncast );
|
||||
|
||||
uint64_t mirror_current_bps( struct mirror * mirror );
|
||||
|
||||
#endif
|
||||
|
@@ -220,7 +220,6 @@ void read_serve_param( int c, char **ip_addr, char **ip_port, char **file, char
|
||||
case 'h':
|
||||
fprintf(stdout, "%s\n", serve_help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 'l':
|
||||
*ip_addr = optarg;
|
||||
break;
|
||||
@@ -263,7 +262,6 @@ void read_listen_param( int c,
|
||||
case 'h':
|
||||
fprintf(stdout, "%s\n", listen_help_text );
|
||||
exit(0);
|
||||
break;
|
||||
case 'l':
|
||||
*ip_addr = optarg;
|
||||
break;
|
||||
@@ -297,7 +295,6 @@ void read_readwrite_param( int c, char **ip_addr, char **ip_port, char **bind_ad
|
||||
case 'h':
|
||||
fprintf(stdout, "%s\n", err_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 'l':
|
||||
*ip_addr = optarg;
|
||||
break;
|
||||
@@ -331,7 +328,6 @@ void read_sock_param( int c, char **sock, char *help_text )
|
||||
case 'h':
|
||||
fprintf( stdout, "%s\n", help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 's':
|
||||
*sock = optarg;
|
||||
break;
|
||||
@@ -362,7 +358,6 @@ void read_mirror_speed_param(
|
||||
case 'h':
|
||||
fprintf( stdout, "%s\n", mirror_speed_help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 's':
|
||||
*sock = optarg;
|
||||
break;
|
||||
@@ -394,7 +389,6 @@ void read_mirror_param(
|
||||
case 'h':
|
||||
fprintf( stdout, "%s\n", mirror_help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 's':
|
||||
*sock = optarg;
|
||||
break;
|
||||
@@ -428,7 +422,6 @@ void read_break_param( int c, char **sock )
|
||||
case 'h':
|
||||
fprintf( stdout, "%s\n", break_help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 's':
|
||||
*sock = optarg;
|
||||
break;
|
||||
@@ -580,7 +573,10 @@ void params_readwrite(
|
||||
|
||||
parse_port( s_port, &out->connect_to.v4 );
|
||||
|
||||
out->from = atol(s_from);
|
||||
long signed_from = atol(s_from);
|
||||
FATAL_IF_NEGATIVE( signed_from,
|
||||
"Can't read from a negative offset %d.", signed_from);
|
||||
out->from = signed_from;
|
||||
|
||||
if (write_not_read) {
|
||||
if (s_length_or_filename[0]-48 < 10) {
|
||||
@@ -592,9 +588,10 @@ void params_readwrite(
|
||||
s_length_or_filename, O_RDONLY);
|
||||
FATAL_IF_NEGATIVE(out->data_fd,
|
||||
"Couldn't open %s", s_length_or_filename);
|
||||
out->len = lseek64(out->data_fd, 0, SEEK_END);
|
||||
FATAL_IF_NEGATIVE(out->len,
|
||||
off64_t signed_len = lseek64(out->data_fd, 0, SEEK_END);
|
||||
FATAL_IF_NEGATIVE(signed_len,
|
||||
"Couldn't find length of %s", s_length_or_filename);
|
||||
out->len = signed_len;
|
||||
FATAL_IF_NEGATIVE(
|
||||
lseek64(out->data_fd, 0, SEEK_SET),
|
||||
"Couldn't rewind %s", s_length_or_filename
|
||||
@@ -787,7 +784,7 @@ int mode_break( int argc, char *argv[] )
|
||||
|
||||
if ( NULL == sock ){
|
||||
fprintf( stderr, "--sock is required.\n" );
|
||||
exit_err( acl_help_text );
|
||||
exit_err( break_help_text );
|
||||
}
|
||||
|
||||
do_remote_command( "break", sock, argc - optind, argv + optind );
|
||||
@@ -808,7 +805,7 @@ int mode_status( int argc, char *argv[] )
|
||||
|
||||
if ( NULL == sock ){
|
||||
fprintf( stderr, "--sock is required.\n" );
|
||||
exit_err( acl_help_text );
|
||||
exit_err( status_help_text );
|
||||
}
|
||||
|
||||
do_remote_command( "status", sock, argc - optind, argv + optind );
|
@@ -78,6 +78,8 @@ struct server * server_create (
|
||||
NULLCHECK( out->close_signal );
|
||||
NULLCHECK( out->acl_updated_signal );
|
||||
|
||||
log_context = s_file;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -233,7 +235,6 @@ int tryjoin_client_thread( struct client_tbl_entry *entry, int (*joinfunc)(pthre
|
||||
|
||||
int was_closed = 0;
|
||||
void * status=NULL;
|
||||
int join_errno;
|
||||
|
||||
if (entry->thread != 0) {
|
||||
char s_client_address[128];
|
||||
@@ -241,7 +242,7 @@ int tryjoin_client_thread( struct client_tbl_entry *entry, int (*joinfunc)(pthre
|
||||
sockaddr_address_string( &entry->address.generic, &s_client_address[0], 128 );
|
||||
|
||||
debug( "%s(%p,...)", joinfunc == pthread_join ? "joining" : "tryjoining", entry->thread );
|
||||
join_errno = joinfunc(entry->thread, &status);
|
||||
int join_errno = joinfunc(entry->thread, &status);
|
||||
|
||||
/* join_errno can legitimately be ESRCH if the thread is
|
||||
* already dead, but the client still needs tidying up. */
|
||||
@@ -256,7 +257,7 @@ int tryjoin_client_thread( struct client_tbl_entry *entry, int (*joinfunc)(pthre
|
||||
debug("nbd thread %016x exited (%s) with status %ld",
|
||||
entry->thread,
|
||||
s_client_address,
|
||||
(uint64_t)status);
|
||||
(uintptr_t)status);
|
||||
client_destroy( entry->client );
|
||||
entry->client = NULL;
|
||||
entry->thread = 0;
|
||||
@@ -423,6 +424,9 @@ void accept_nbd_client(
|
||||
int slot;
|
||||
char s_client_address[64] = {0};
|
||||
|
||||
FATAL_IF_NEGATIVE( sock_set_keepalive_params( client_fd, CLIENT_KEEPALIVE_TIME, CLIENT_KEEPALIVE_INTVL, CLIENT_KEEPALIVE_PROBES),
|
||||
"Error setting keepalive parameters on client socket fd %d", client_fd );
|
||||
|
||||
|
||||
if ( !server_should_accept_client( params, client_address, s_client_address, 64 ) ) {
|
||||
FATAL_IF_NEGATIVE( close( client_fd ),
|
||||
@@ -598,7 +602,6 @@ int server_accept( struct server * params )
|
||||
{
|
||||
NULLCHECK( params );
|
||||
debug("accept loop starting");
|
||||
int client_fd;
|
||||
union mysockaddr client_address;
|
||||
fd_set fds;
|
||||
socklen_t socklen=sizeof(client_address);
|
||||
@@ -638,7 +641,7 @@ int server_accept( struct server * params )
|
||||
}
|
||||
|
||||
if ( FD_ISSET( params->server_fd, &fds ) ){
|
||||
client_fd = accept( params->server_fd, &client_address.generic, &socklen );
|
||||
int client_fd = accept( params->server_fd, &client_address.generic, &socklen );
|
||||
|
||||
if ( params->allow_new_clients ) {
|
||||
debug("Accepted nbd client socket fd %d", client_fd);
|
||||
@@ -686,6 +689,7 @@ void* build_allocation_map_thread(void* serve_uncast)
|
||||
* the future, we'll need to wait for the allocation map to finish or
|
||||
* fail before we can complete the migration.
|
||||
*/
|
||||
serve->allocation_map_not_built = 1;
|
||||
warn( "Didn't build allocation map for %s", serve->filename );
|
||||
}
|
||||
|
||||
@@ -740,11 +744,11 @@ void server_join_clients( struct server * serve ) {
|
||||
|
||||
for (i=0; i < serve->max_nbd_clients; i++) {
|
||||
pthread_t thread_id = serve->nbd_client[i].thread;
|
||||
int err = 0;
|
||||
|
||||
if (thread_id != 0) {
|
||||
debug( "joining thread %p", thread_id );
|
||||
if ( 0 == (err = pthread_join( thread_id, &status ) ) ) {
|
||||
int err = pthread_join( thread_id, &status );
|
||||
if ( 0 == err ) {
|
||||
serve->nbd_client[i].thread = 0;
|
||||
} else {
|
||||
warn( "Error %s (%i) joining thread %p", strerror( err ), err, thread_id );
|
||||
@@ -878,7 +882,19 @@ uint64_t server_mirror_eta( struct server * serve )
|
||||
{
|
||||
if ( server_is_mirroring( serve ) ) {
|
||||
uint64_t bytes_to_xfer = server_mirror_bytes_remaining( serve );
|
||||
return bytes_to_xfer / ( mirror_current_bps( serve->mirror ) + 1 );
|
||||
return bytes_to_xfer / ( server_mirror_bps( serve ) + 1 );
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t server_mirror_bps( struct server * serve )
|
||||
{
|
||||
if ( server_is_mirroring( serve ) ) {
|
||||
uint64_t duration_ms =
|
||||
monotonic_time_ms() - serve->mirror->migration_started;
|
||||
|
||||
return serve->mirror->all_dirty / ( ( duration_ms / 1000 ) + 1 );
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -941,4 +957,3 @@ int do_serve( struct server* params, struct self_pipe * open_signal )
|
||||
|
||||
return success;
|
||||
}
|
||||
|
@@ -21,6 +21,9 @@ struct client_tbl_entry {
|
||||
|
||||
|
||||
#define MAX_NBD_CLIENTS 16
|
||||
#define CLIENT_KEEPALIVE_TIME 30
|
||||
#define CLIENT_KEEPALIVE_INTVL 10
|
||||
#define CLIENT_KEEPALIVE_PROBES 3
|
||||
struct server {
|
||||
/* The flexnbd wrapper this server is attached to */
|
||||
struct flexnbd * flexnbd;
|
||||
@@ -76,8 +79,10 @@ struct server {
|
||||
struct bitset * allocation_map;
|
||||
/* when starting up, this thread builds the allocation_map */
|
||||
pthread_t allocation_map_builder_thread;
|
||||
|
||||
/* when the thread has finished, it sets this to 1 */
|
||||
volatile sig_atomic_t allocation_map_built;
|
||||
volatile sig_atomic_t allocation_map_not_built;
|
||||
|
||||
int max_nbd_clients;
|
||||
struct client_tbl_entry *nbd_client;
|
||||
@@ -126,6 +131,7 @@ int server_is_mirroring( struct server * serve );
|
||||
|
||||
uint64_t server_mirror_bytes_remaining( struct server * serve );
|
||||
uint64_t server_mirror_eta( struct server * serve );
|
||||
uint64_t server_mirror_bps( struct server * serve );
|
||||
|
||||
void server_abandon_mirror( struct server * serve );
|
||||
void server_prevent_mirror_start( struct server *serve );
|
||||
@@ -151,8 +157,10 @@ int do_serve( struct server *, struct self_pipe * );
|
||||
struct mode_readwrite_params {
|
||||
union mysockaddr connect_to;
|
||||
union mysockaddr connect_from;
|
||||
off64_t from;
|
||||
off64_t len;
|
||||
|
||||
uint64_t from;
|
||||
uint32_t len;
|
||||
|
||||
int data_fd;
|
||||
int client;
|
||||
};
|
@@ -27,10 +27,11 @@ struct status * status_create( struct server * serve )
|
||||
status->migration_duration = 0;
|
||||
}
|
||||
status->migration_duration /= 1000;
|
||||
status->migration_speed = serve->mirror->all_dirty / ( status->migration_duration + 1 );
|
||||
status->migration_speed = server_mirror_bps( serve );
|
||||
status->migration_speed_limit = serve->mirror->max_bytes_per_second;
|
||||
|
||||
status->migration_seconds_left = server_mirror_eta( serve );
|
||||
status->migration_bytes_left = server_mirror_bytes_remaining( serve );
|
||||
}
|
||||
|
||||
server_unlock_start_mirror( serve );
|
||||
@@ -60,6 +61,7 @@ int status_write( struct status * status, int fd )
|
||||
PRINT_UINT64( migration_speed );
|
||||
PRINT_UINT64( migration_duration );
|
||||
PRINT_UINT64( migration_seconds_left );
|
||||
PRINT_UINT64( migration_bytes_left );
|
||||
if ( status->migration_speed_limit < UINT64_MAX ) {
|
||||
PRINT_UINT64( migration_speed_limit );
|
||||
};
|
@@ -64,6 +64,8 @@
|
||||
* Our current best estimate of how many seconds are left before the migration
|
||||
* migration is finished.
|
||||
*
|
||||
* migration_bytes_left:
|
||||
* The number of bytes remaining to migrate.
|
||||
*/
|
||||
|
||||
|
||||
@@ -84,6 +86,7 @@ struct status {
|
||||
uint64_t migration_speed;
|
||||
uint64_t migration_speed_limit;
|
||||
uint64_t migration_seconds_left;
|
||||
uint64_t migration_bytes_left;
|
||||
};
|
||||
|
||||
/** Create a status object for the given server. */
|
@@ -21,6 +21,11 @@ class Environment
|
||||
@fake_pid = nil
|
||||
end
|
||||
|
||||
def prefetch_proxy!
|
||||
@nbd1.prefetch_proxy = true
|
||||
@nbd2.prefetch_proxy = true
|
||||
end
|
||||
|
||||
def proxy1(port=@port2)
|
||||
@nbd1.proxy(@ip, port)
|
||||
end
|
||||
|
@@ -20,7 +20,13 @@ t = Thread.start do
|
||||
client2.close
|
||||
end
|
||||
|
||||
sleep( FlexNBD::MS_REQUEST_LIMIT_SECS + 2 )
|
||||
sleep_time = if ENV.has_key?('FLEXNBD_MS_REQUEST_LIMIT_SECS')
|
||||
ENV['FLEXNBD_MS_REQUEST_LIMIT_SECS'].to_f
|
||||
else
|
||||
FlexNBD::MS_REQUEST_LIMIT_SECS
|
||||
end
|
||||
|
||||
sleep( sleep_time + 2.0 )
|
||||
client1.close
|
||||
|
||||
t.join
|
||||
|
@@ -198,6 +198,8 @@ module FlexNBD
|
||||
end
|
||||
end
|
||||
|
||||
attr_accessor :prefetch_proxy
|
||||
|
||||
def initialize( bin, ip, port )
|
||||
@bin = bin
|
||||
@do_debug = ENV['DEBUG']
|
||||
@@ -208,6 +210,7 @@ module FlexNBD
|
||||
@ip = ip
|
||||
@port = port
|
||||
@kill = []
|
||||
@prefetch_proxy = false
|
||||
end
|
||||
|
||||
|
||||
@@ -247,6 +250,7 @@ module FlexNBD
|
||||
"--port #{port} "\
|
||||
"--conn-addr #{connect_ip} "\
|
||||
"--conn-port #{connect_port} "\
|
||||
"#{prefetch_proxy ? "--cache " : ""}"\
|
||||
"#{@debug}"
|
||||
end
|
||||
|
||||
@@ -458,12 +462,18 @@ module FlexNBD
|
||||
|
||||
def maybe_timeout(cmd, timeout=nil )
|
||||
stdout, stderr = "",""
|
||||
stat = nil
|
||||
run = Proc.new do
|
||||
Open3.popen3( cmd ) do |io_in, io_out, io_err|
|
||||
# Ruby 1.9 changed the popen3 api. instead of 3 args, the block
|
||||
# gets 4. Not only that, but it no longer sets $?, so we have to
|
||||
# go elsewhere for the process' exit status.
|
||||
Open3.popen3( cmd ) do |io_in, io_out, io_err, maybe_thr|
|
||||
io_in.close
|
||||
stdout.replace io_out.read
|
||||
stderr.replace io_err.read
|
||||
stat = maybe_thr.value if maybe_thr
|
||||
end
|
||||
stat ||= $?
|
||||
end
|
||||
|
||||
if timeout
|
||||
@@ -472,13 +482,13 @@ module FlexNBD
|
||||
run.call
|
||||
end
|
||||
|
||||
[stdout, stderr]
|
||||
[stdout, stderr, stat]
|
||||
end
|
||||
|
||||
|
||||
def mirror(dest_ip, dest_port, bandwidth=nil, action=nil)
|
||||
stdout, stderr = mirror_unchecked( dest_ip, dest_port, bandwidth, action )
|
||||
raise IOError.new( "Migrate command failed\n" + stderr) unless $?.success?
|
||||
stdout, stderr, status = mirror_unchecked( dest_ip, dest_port, bandwidth, action )
|
||||
raise IOError.new( "Migrate command failed\n" + stderr) unless status.success?
|
||||
|
||||
stdout
|
||||
end
|
||||
|
@@ -2,6 +2,14 @@
|
||||
|
||||
module FlexNBD
|
||||
|
||||
def self.binary( str )
|
||||
if str.respond_to? :force_encoding
|
||||
str.force_encoding "ASCII-8BIT"
|
||||
else
|
||||
str
|
||||
end
|
||||
end
|
||||
|
||||
# eeevil is his one and only name...
|
||||
def self.read_constants
|
||||
parents = []
|
||||
@@ -17,14 +25,14 @@ module FlexNBD
|
||||
|
||||
fail "No source root!" unless source_root
|
||||
|
||||
headers = Dir[File.join( source_root, "src", "*.h" ) ]
|
||||
headers = Dir[File.join( source_root, "src", "{common,proxy,server}","*.h" ) ]
|
||||
|
||||
headers.each do |header_filename|
|
||||
txt_lines = File.readlines( header_filename )
|
||||
txt_lines.each do |line|
|
||||
if line =~ /^#\s*define\s+([A-Z0-9_]+)\s+(\d+)\s*$/
|
||||
# Bodge until I can figure out what to do with #ifdefs
|
||||
const_set($1, $2.to_i) unless constants.include?( $1 )
|
||||
const_set($1, $2.to_i) unless const_defined?( $1 )
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -33,8 +41,8 @@ module FlexNBD
|
||||
|
||||
read_constants()
|
||||
|
||||
REQUEST_MAGIC = "\x25\x60\x95\x13" unless defined?(REQUEST_MAGIC)
|
||||
REPLY_MAGIC = "\x67\x44\x66\x98" unless defined?(REPLY_MAGIC)
|
||||
REQUEST_MAGIC = binary("\x25\x60\x95\x13") unless defined?(REQUEST_MAGIC)
|
||||
REPLY_MAGIC = binary("\x67\x44\x66\x98") unless defined?(REPLY_MAGIC)
|
||||
|
||||
end # module FlexNBD
|
||||
|
||||
|
@@ -138,7 +138,7 @@ module FlexNBD
|
||||
end
|
||||
|
||||
|
||||
def accept( err_msg = "Timed out waiting for a connection", timeout = 2)
|
||||
def accept( err_msg = "Timed out waiting for a connection", timeout = 5)
|
||||
client_sock = nil
|
||||
|
||||
begin
|
||||
|
194
tests/acceptance/proxy_tests.rb
Normal file
194
tests/acceptance/proxy_tests.rb
Normal file
@@ -0,0 +1,194 @@
|
||||
# encoding: utf-8
|
||||
require 'flexnbd/fake_source'
|
||||
require 'flexnbd/fake_dest'
|
||||
|
||||
module ProxyTests
|
||||
def b
|
||||
"\xFF".b
|
||||
end
|
||||
|
||||
def with_proxied_client( override_size = nil )
|
||||
@env.serve1 unless @server_up
|
||||
@env.proxy2 unless @proxy_up
|
||||
@env.nbd2.can_die(0)
|
||||
client = FlexNBD::FakeSource.new(@env.ip, @env.port2, "Couldn't connect to proxy")
|
||||
begin
|
||||
|
||||
result = client.read_hello
|
||||
assert_equal "NBDMAGIC", result[:magic]
|
||||
assert_equal override_size || @env.file1.size, result[:size]
|
||||
|
||||
yield client
|
||||
ensure
|
||||
client.close rescue nil
|
||||
end
|
||||
end
|
||||
|
||||
def test_exits_with_error_when_cannot_connect_to_upstream_on_start
|
||||
assert_raises(RuntimeError) { @env.proxy1 }
|
||||
end
|
||||
|
||||
def test_read_requests_successfully_proxied
|
||||
with_proxied_client do |client|
|
||||
(0..3).each do |n|
|
||||
offset = n * 4096
|
||||
client.write_read_request(offset, 4096, "myhandle")
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
orig_data = @env.file1.read(offset, 4096)
|
||||
data = client.read_raw(4096)
|
||||
|
||||
assert_equal 4096, orig_data.size
|
||||
assert_equal 4096, data.size
|
||||
|
||||
assert_equal( orig_data, data,
|
||||
"Returned data does not match on request #{n+1}" )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_write_requests_successfully_proxied
|
||||
with_proxied_client do |client|
|
||||
(0..3).each do |n|
|
||||
offset = n * 4096
|
||||
client.write(offset, b * 4096)
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
data = @env.file1.read(offset, 4096)
|
||||
|
||||
assert_equal( ( b * 4096 ), data, "Data not written correctly (offset is #{n})" )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def make_fake_server
|
||||
server = FlexNBD::FakeDest.new(@env.ip, @env.port1)
|
||||
@server_up = true
|
||||
|
||||
# We return a thread here because accept() and connect() both block for us
|
||||
Thread.new do
|
||||
sc = server.accept # just tell the supervisor we're up
|
||||
sc.write_hello
|
||||
|
||||
[ server, sc ]
|
||||
end
|
||||
end
|
||||
|
||||
def test_read_request_retried_when_upstream_dies_partway
|
||||
maker = make_fake_server
|
||||
|
||||
with_proxied_client(4096) do |client|
|
||||
server, sc1 = maker.value
|
||||
|
||||
# Send the read request to the proxy
|
||||
client.write_read_request( 0, 4096 )
|
||||
|
||||
# ensure we're given the read request
|
||||
req1 = sc1.read_request
|
||||
assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic]
|
||||
assert_equal ::FlexNBD::REQUEST_READ, req1[:type]
|
||||
assert_equal 0, req1[:from]
|
||||
assert_not_equal 0, req1[:len]
|
||||
|
||||
# Kill the server again, now we're sure the read request has been sent once
|
||||
sc1.close
|
||||
|
||||
# We expect the proxy to reconnect without our client doing anything.
|
||||
sc2 = server.accept
|
||||
sc2.write_hello
|
||||
|
||||
# And once reconnected, it should resend an identical request.
|
||||
req2 = sc2.read_request
|
||||
assert_equal req1, req2
|
||||
|
||||
# The reply should be proxied back to the client.
|
||||
sc2.write_reply( req2[:handle] )
|
||||
sc2.write_data( b * 4096 )
|
||||
|
||||
# Check it to make sure it's correct
|
||||
rsp = timeout(15) { client.read_response }
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
assert_equal req1[:handle], rsp[:handle]
|
||||
|
||||
data = client.read_raw( 4096 )
|
||||
assert_equal( (b * 4096), data, "Wrong data returned" )
|
||||
|
||||
sc2.close
|
||||
server.close
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
def test_write_request_retried_when_upstream_dies_partway
|
||||
maker = make_fake_server
|
||||
|
||||
with_proxied_client(4096) do |client|
|
||||
server, sc1 = maker.value
|
||||
|
||||
# Send the read request to the proxy
|
||||
client.write( 0, ( b * 4096 ) )
|
||||
|
||||
# ensure we're given the read request
|
||||
req1 = sc1.read_request
|
||||
assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic]
|
||||
assert_equal ::FlexNBD::REQUEST_WRITE, req1[:type]
|
||||
assert_equal 0, req1[:from]
|
||||
assert_equal 4096, req1[:len]
|
||||
data1 = sc1.read_data( 4096 )
|
||||
assert_equal( ( b * 4096 ), data1, "Data not proxied successfully" )
|
||||
|
||||
# Kill the server again, now we're sure the read request has been sent once
|
||||
sc1.close
|
||||
|
||||
# We expect the proxy to reconnect without our client doing anything.
|
||||
sc2 = server.accept
|
||||
sc2.write_hello
|
||||
|
||||
# And once reconnected, it should resend an identical request.
|
||||
req2 = sc2.read_request
|
||||
assert_equal req1, req2
|
||||
data2 = sc2.read_data( 4096 )
|
||||
assert_equal data1, data2
|
||||
|
||||
# The reply should be proxied back to the client.
|
||||
sc2.write_reply( req2[:handle] )
|
||||
|
||||
# Check it to make sure it's correct
|
||||
rsp = timeout(15) { client.read_response }
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
assert_equal req1[:handle], rsp[:handle]
|
||||
|
||||
sc2.close
|
||||
server.close
|
||||
end
|
||||
end
|
||||
|
||||
def test_only_one_client_can_connect_to_proxy_at_a_time
|
||||
with_proxied_client do |client|
|
||||
|
||||
c2 = nil
|
||||
assert_raises(Timeout::Error) do
|
||||
timeout(1) do
|
||||
c2 = FlexNBD::FakeSource.new(@env.ip, @env.port2, "Couldn't connect to proxy (2)")
|
||||
c2.read_hello
|
||||
end
|
||||
end
|
||||
c2.close rescue nil if c2
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
@@ -2,12 +2,17 @@
|
||||
|
||||
require 'test/unit'
|
||||
require 'environment'
|
||||
require 'flexnbd/constants'
|
||||
|
||||
class TestHappyPath < Test::Unit::TestCase
|
||||
def setup
|
||||
@env = Environment.new
|
||||
end
|
||||
|
||||
def bin(str)
|
||||
FlexNBD.binary str
|
||||
end
|
||||
|
||||
def teardown
|
||||
@env.nbd1.can_die(0)
|
||||
@env.nbd2.can_die(0)
|
||||
@@ -22,13 +27,13 @@ class TestHappyPath < Test::Unit::TestCase
|
||||
[0, 12, 63].each do |num|
|
||||
|
||||
assert_equal(
|
||||
@env.nbd1.read(num*@env.blocksize, @env.blocksize),
|
||||
@env.file1.read(num*@env.blocksize, @env.blocksize)
|
||||
bin( @env.nbd1.read(num*@env.blocksize, @env.blocksize) ),
|
||||
bin( @env.file1.read(num*@env.blocksize, @env.blocksize) )
|
||||
)
|
||||
end
|
||||
|
||||
[124, 1200, 10028, 25488].each do |num|
|
||||
assert_equal(@env.nbd1.read(num, 4), @env.file1.read(num, 4))
|
||||
assert_equal(bin(@env.nbd1.read(num, 4)), bin(@env.file1.read(num, 4)))
|
||||
end
|
||||
end
|
||||
|
||||
@@ -102,7 +107,7 @@ class TestHappyPath < Test::Unit::TestCase
|
||||
assert_no_match( /unrecognized/, stderr )
|
||||
|
||||
|
||||
Timeout.timeout(2) do @env.nbd1.join end
|
||||
Timeout.timeout(10) do @env.nbd1.join end
|
||||
|
||||
assert !File.file?( @env.filename1 )
|
||||
end
|
||||
@@ -110,6 +115,11 @@ class TestHappyPath < Test::Unit::TestCase
|
||||
|
||||
|
||||
def test_write_to_high_block
|
||||
#
|
||||
# This test does not work on 32 bit platforms.
|
||||
#
|
||||
skip("Not relevant on 32-bit platforms") if ( ["a"].pack("p").size < 8 )
|
||||
|
||||
# Create a large file, then try to write to somewhere after the 2G boundary
|
||||
@env.truncate1 "4G"
|
||||
@env.serve1
|
||||
|
22
tests/acceptance/test_prefetch_proxy_mode.rb
Normal file
22
tests/acceptance/test_prefetch_proxy_mode.rb
Normal file
@@ -0,0 +1,22 @@
|
||||
require 'test/unit'
|
||||
require 'environment'
|
||||
require 'proxy_tests'
|
||||
|
||||
|
||||
class TestPrefetchProxyMode < Test::Unit::TestCase
|
||||
include ProxyTests
|
||||
|
||||
def setup
|
||||
super
|
||||
@env = Environment.new
|
||||
@env.prefetch_proxy!
|
||||
@env.writefile1( "f" * 16 )
|
||||
end
|
||||
|
||||
def teardown
|
||||
@env.cleanup
|
||||
super
|
||||
end
|
||||
end
|
||||
|
||||
|
@@ -1,200 +1,20 @@
|
||||
require 'test/unit'
|
||||
require 'environment'
|
||||
require 'flexnbd/fake_source'
|
||||
require 'flexnbd/fake_dest'
|
||||
require 'proxy_tests'
|
||||
|
||||
|
||||
class TestProxyMode < Test::Unit::TestCase
|
||||
include ProxyTests
|
||||
|
||||
def setup
|
||||
super
|
||||
@env = Environment.new
|
||||
@env.writefile1( "0" * 16 )
|
||||
@env.writefile1( "f" * 16 )
|
||||
end
|
||||
|
||||
def teardown
|
||||
@env.cleanup
|
||||
super
|
||||
end
|
||||
|
||||
def with_proxied_client( override_size = nil )
|
||||
@env.serve1 unless @server_up
|
||||
@env.proxy2 unless @proxy_up
|
||||
@env.nbd2.can_die(0)
|
||||
client = FlexNBD::FakeSource.new(@env.ip, @env.port2, "Couldn't connect to proxy")
|
||||
begin
|
||||
|
||||
result = client.read_hello
|
||||
assert_equal "NBDMAGIC", result[:magic]
|
||||
assert_equal override_size || @env.file1.size, result[:size]
|
||||
|
||||
yield client
|
||||
ensure
|
||||
client.close rescue nil
|
||||
end
|
||||
end
|
||||
|
||||
def test_exits_with_error_when_cannot_connect_to_upstream_on_start
|
||||
assert_raises(RuntimeError) { @env.proxy1 }
|
||||
end
|
||||
|
||||
def test_read_requests_successfully_proxied
|
||||
with_proxied_client do |client|
|
||||
(0..3).each do |n|
|
||||
offset = n * 4096
|
||||
client.write_read_request(offset, 4096, "myhandle")
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
orig_data = @env.file1.read(offset, 4096)
|
||||
data = client.read_raw(4096)
|
||||
|
||||
assert_equal 4096, orig_data.size
|
||||
assert_equal 4096, data.size
|
||||
|
||||
assert_equal( orig_data, data, "Returned data does not match" )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_write_requests_successfully_proxied
|
||||
with_proxied_client do |client|
|
||||
(0..3).each do |n|
|
||||
offset = n * 4096
|
||||
client.write(offset, "\xFF" * 4096)
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
data = @env.file1.read(offset, 4096)
|
||||
|
||||
assert_equal( ( "\xFF" * 4096 ), data, "Data not written correctly (offset is #{n})" )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def make_fake_server
|
||||
server = FlexNBD::FakeDest.new(@env.ip, @env.port1)
|
||||
@server_up = true
|
||||
|
||||
# We return a thread here because accept() and connect() both block for us
|
||||
Thread.new do
|
||||
sc = server.accept # just tell the supervisor we're up
|
||||
sc.write_hello
|
||||
|
||||
[ server, sc ]
|
||||
end
|
||||
end
|
||||
|
||||
def test_read_request_retried_when_upstream_dies_partway
|
||||
maker = make_fake_server
|
||||
|
||||
with_proxied_client(4096) do |client|
|
||||
server, sc1 = maker.value
|
||||
|
||||
# Send the read request to the proxy
|
||||
client.write_read_request( 0, 4096 )
|
||||
|
||||
# ensure we're given the read request
|
||||
req1 = sc1.read_request
|
||||
assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic]
|
||||
assert_equal ::FlexNBD::REQUEST_READ, req1[:type]
|
||||
assert_equal 0, req1[:from]
|
||||
assert_not_equal 0, req1[:len]
|
||||
|
||||
# Kill the server again, now we're sure the read request has been sent once
|
||||
sc1.close
|
||||
|
||||
# We expect the proxy to reconnect without our client doing anything.
|
||||
sc2 = server.accept
|
||||
sc2.write_hello
|
||||
|
||||
# And once reconnected, it should resend an identical request.
|
||||
req2 = sc2.read_request
|
||||
assert_equal req1, req2
|
||||
|
||||
# The reply should be proxied back to the client.
|
||||
sc2.write_reply( req2[:handle] )
|
||||
sc2.write_data( "\xFF" * 4096 )
|
||||
|
||||
# Check it to make sure it's correct
|
||||
rsp = timeout(15) { client.read_response }
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
assert_equal req1[:handle], rsp[:handle]
|
||||
|
||||
data = client.read_raw( 4096 )
|
||||
assert_equal( ("\xFF" * 4096), data, "Wrong data returned" )
|
||||
|
||||
sc2.close
|
||||
server.close
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
def test_write_request_retried_when_upstream_dies_partway
|
||||
maker = make_fake_server
|
||||
|
||||
with_proxied_client(4096) do |client|
|
||||
server, sc1 = maker.value
|
||||
|
||||
# Send the read request to the proxy
|
||||
client.write( 0, ( "\xFF" * 4096 ) )
|
||||
|
||||
# ensure we're given the read request
|
||||
req1 = sc1.read_request
|
||||
assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic]
|
||||
assert_equal ::FlexNBD::REQUEST_WRITE, req1[:type]
|
||||
assert_equal 0, req1[:from]
|
||||
assert_equal 4096, req1[:len]
|
||||
data1 = sc1.read_data( 4096 )
|
||||
assert_equal( ( "\xFF" * 4096 ), data1, "Data not proxied successfully" )
|
||||
|
||||
# Kill the server again, now we're sure the read request has been sent once
|
||||
sc1.close
|
||||
|
||||
# We expect the proxy to reconnect without our client doing anything.
|
||||
sc2 = server.accept
|
||||
sc2.write_hello
|
||||
|
||||
# And once reconnected, it should resend an identical request.
|
||||
req2 = sc2.read_request
|
||||
assert_equal req1, req2
|
||||
data2 = sc2.read_data( 4096 )
|
||||
assert_equal data1, data2
|
||||
|
||||
# The reply should be proxied back to the client.
|
||||
sc2.write_reply( req2[:handle] )
|
||||
|
||||
# Check it to make sure it's correct
|
||||
rsp = timeout(15) { client.read_response }
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
assert_equal req1[:handle], rsp[:handle]
|
||||
|
||||
sc2.close
|
||||
server.close
|
||||
end
|
||||
end
|
||||
|
||||
def test_only_one_client_can_connect_to_proxy_at_a_time
|
||||
with_proxied_client do |client|
|
||||
|
||||
c2 = nil
|
||||
assert_raises(Timeout::Error) do
|
||||
timeout(1) do
|
||||
c2 = FlexNBD::FakeSource.new(@env.ip, @env.port2, "Couldn't connect to proxy (2)")
|
||||
c2.read_hello
|
||||
end
|
||||
end
|
||||
c2.close rescue nil if c2
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
@@ -6,6 +6,7 @@ class TestServeMode < Test::Unit::TestCase
|
||||
|
||||
def setup
|
||||
super
|
||||
@b = "\xFF".b
|
||||
@env = Environment.new
|
||||
@env.writefile1( "0" )
|
||||
@env.serve1
|
||||
@@ -53,18 +54,18 @@ class TestServeMode < Test::Unit::TestCase
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
client.write( 0, "\xFF" )
|
||||
client.write( 0, @b )
|
||||
rsp = client.read_response
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
client.write( 0, "\xFF\xFF" )
|
||||
client.write( 0, @b * 2 )
|
||||
rsp = client.read_response
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
end
|
||||
|
||||
assert_equal "\xFF\xFF", @env.file1.read( 0, 2 )
|
||||
assert_equal @b * 2, @env.file1.read( 0, 2 )
|
||||
end
|
||||
|
||||
|
||||
|
@@ -7,6 +7,9 @@ require 'environment'
|
||||
class TestSourceErrorHandling < Test::Unit::TestCase
|
||||
|
||||
def setup
|
||||
@old_env = ENV['FLEXNBD_MS_REQUEST_LIMIT_SECS']
|
||||
ENV['FLEXNBD_MS_REQUEST_LIMIT_SECS'] = "4.0"
|
||||
|
||||
@env = Environment.new
|
||||
@env.writefile1( "f" * 4 )
|
||||
@env.serve1
|
||||
@@ -16,6 +19,7 @@ class TestSourceErrorHandling < Test::Unit::TestCase
|
||||
def teardown
|
||||
@env.nbd1.can_die(0)
|
||||
@env.cleanup
|
||||
ENV['FLEXNBD_MS_REQUEST_LIMIT_SECS'] = @old_env
|
||||
end
|
||||
|
||||
|
||||
|
@@ -10,7 +10,7 @@
|
||||
START_TEST(test_bit_set)
|
||||
{
|
||||
uint64_t num = 0;
|
||||
char *bits = (char*) #
|
||||
bitfield_p bits = (bitfield_p) #
|
||||
|
||||
#define TEST_BIT_SET(bit, newvalue) \
|
||||
bit_set(bits, (bit)); \
|
||||
@@ -27,7 +27,7 @@ END_TEST
|
||||
START_TEST(test_bit_clear)
|
||||
{
|
||||
uint64_t num = 0xffffffffffffffff;
|
||||
char *bits = (char*) #
|
||||
bitfield_p bits = (bitfield_p) #
|
||||
|
||||
#define TEST_BIT_CLEAR(bit, newvalue) \
|
||||
bit_clear(bits, (bit)); \
|
||||
@@ -44,7 +44,7 @@ END_TEST
|
||||
START_TEST(test_bit_tests)
|
||||
{
|
||||
uint64_t num = 0x5555555555555555;
|
||||
char *bits = (char*) #
|
||||
bitfield_p bits = (bitfield_p) #
|
||||
|
||||
fail_unless(bit_has_value(bits, 0, 1), "bit_has_value malfunction");
|
||||
fail_unless(bit_has_value(bits, 1, 0), "bit_has_value malfunction");
|
||||
@@ -58,8 +58,8 @@ END_TEST
|
||||
|
||||
START_TEST(test_bit_ranges)
|
||||
{
|
||||
char buffer[4160];
|
||||
uint64_t *longs = (unsigned long*) buffer;
|
||||
bitfield_word_t buffer[BIT_WORDS_FOR_SIZE(4160)];
|
||||
uint64_t *longs = (uint64_t *) buffer;
|
||||
uint64_t i;
|
||||
|
||||
memset(buffer, 0, 4160);
|
||||
@@ -67,9 +67,9 @@ START_TEST(test_bit_ranges)
|
||||
for (i=0; i<64; i++) {
|
||||
bit_set_range(buffer, i*64, i);
|
||||
fail_unless(
|
||||
longs[i] == (1UL<<i)-1,
|
||||
longs[i] == (1ULL<<i)-1,
|
||||
"longs[%ld] = %lx SHOULD BE %lx",
|
||||
i, longs[i], (1L<<i)-1
|
||||
i, longs[i], (1ULL<<i)-1
|
||||
);
|
||||
|
||||
fail_unless(longs[i+1] == 0, "bit_set_range overshot at i=%d", i);
|
||||
@@ -84,7 +84,7 @@ END_TEST
|
||||
|
||||
START_TEST(test_bit_runs)
|
||||
{
|
||||
char buffer[256];
|
||||
bitfield_word_t buffer[BIT_WORDS_FOR_SIZE(256)];
|
||||
int i, ptr=0, runs[] = {
|
||||
56,97,22,12,83,1,45,80,85,51,64,40,63,67,75,64,94,81,79,62
|
||||
};
|
||||
|
@@ -76,8 +76,8 @@ START_TEST( test_read_request_quits_on_stop_signal )
|
||||
|
||||
client_signal_stop( c );
|
||||
|
||||
int client_read_request( struct client *, struct nbd_request *);
|
||||
fail_unless( 0 == client_read_request( c, &nbdr ), "Didn't quit on stop." );
|
||||
int client_serve_request( struct client *);
|
||||
fail_unless( 1 == client_serve_request( c ), "Didn't quit on stop." );
|
||||
|
||||
close( fds[0] );
|
||||
close( fds[1] );
|
||||
|
@@ -66,9 +66,9 @@ START_TEST( test_receive_blocks_until_post )
|
||||
END_TEST
|
||||
|
||||
|
||||
Suite* acl_suite(void)
|
||||
Suite* mbox_suite(void)
|
||||
{
|
||||
Suite *s = suite_create("acl");
|
||||
Suite *s = suite_create("mbox");
|
||||
TCase *tc_create = tcase_create("create");
|
||||
TCase *tc_post = tcase_create("post");
|
||||
|
||||
@@ -93,7 +93,7 @@ int main(void)
|
||||
log_level = 2;
|
||||
#endif
|
||||
int number_failed;
|
||||
Suite *s = acl_suite();
|
||||
Suite *s = mbox_suite();
|
||||
SRunner *sr = srunner_create(s);
|
||||
srunner_run_all(sr, CK_NORMAL);
|
||||
log_level = 0;
|
||||
|
@@ -88,14 +88,14 @@ START_TEST(test_request_handle)
|
||||
struct nbd_request_raw request_raw;
|
||||
struct nbd_request request;
|
||||
|
||||
memcpy( request_raw.handle, "MYHANDLE", 8 );
|
||||
memcpy( request_raw.handle.b, "MYHANDLE", 8 );
|
||||
|
||||
nbd_r2h_request( &request_raw, &request );
|
||||
memset( request_raw.handle, 0, 8 );
|
||||
request_raw.handle.w = 0;
|
||||
nbd_h2r_request( &request, &request_raw );
|
||||
|
||||
fail_unless( memcmp( request.handle, "MYHANDLE", 8 ) == 0, "The handle was not copied." );
|
||||
fail_unless( memcmp( request_raw.handle, "MYHANDLE", 8 ) == 0, "The handle was not copied back." );
|
||||
fail_unless( memcmp( request.handle.b, "MYHANDLE", 8 ) == 0, "The handle was not copied." );
|
||||
fail_unless( memcmp( request_raw.handle.b, "MYHANDLE", 8 ) == 0, "The handle was not copied back." );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
@@ -170,14 +170,14 @@ START_TEST(test_reply_handle)
|
||||
struct nbd_reply_raw reply_raw;
|
||||
struct nbd_reply reply;
|
||||
|
||||
memcpy( reply_raw.handle, "MYHANDLE", 8 );
|
||||
memcpy( reply_raw.handle.b, "MYHANDLE", 8 );
|
||||
|
||||
nbd_r2h_reply( &reply_raw, &reply );
|
||||
memset( reply_raw.handle, 0, 8 );
|
||||
reply_raw.handle.w = 0;
|
||||
nbd_h2r_reply( &reply, &reply_raw );
|
||||
|
||||
fail_unless( memcmp( reply.handle, "MYHANDLE", 8 ) == 0, "The handle was not copied." );
|
||||
fail_unless( memcmp( reply_raw.handle, "MYHANDLE", 8 ) == 0, "The handle was not copied back." );
|
||||
fail_unless( memcmp( reply.handle.b, "MYHANDLE", 8 ) == 0, "The handle was not copied." );
|
||||
fail_unless( memcmp( reply_raw.handle.b, "MYHANDLE", 8 ) == 0, "The handle was not copied back." );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
@@ -188,14 +188,15 @@ START_TEST( test_convert_from )
|
||||
* nbd_request_raw */
|
||||
struct nbd_request_raw request_raw;
|
||||
struct nbd_request request;
|
||||
char readbuf[] = {0x80, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
memcpy( &request_raw.from, readbuf, 8 );
|
||||
uint64_t target = 0x8000000000000000;
|
||||
|
||||
/* this is stored big-endian */
|
||||
request_raw.from = htobe64(target);
|
||||
|
||||
/* We expect this to convert big-endian to the host format */
|
||||
nbd_r2h_request( &request_raw, &request );
|
||||
|
||||
uint64_t target = 1;
|
||||
target <<= 63;
|
||||
fail_unless( target == request.from, "from was wrong" );
|
||||
}
|
||||
END_TEST
|
||||
|
@@ -22,7 +22,7 @@
|
||||
|
||||
|
||||
int fd_read_request( int, struct nbd_request_raw *);
|
||||
int fd_write_reply( int, char *, int );
|
||||
int fd_write_reply( int, uint64_t, int );
|
||||
|
||||
int marker;
|
||||
|
||||
@@ -46,8 +46,7 @@ void * responder( void *respond_uncast )
|
||||
struct respond * resp = (struct respond *) respond_uncast;
|
||||
int sock_fd = resp->sock_fds[1];
|
||||
struct nbd_request_raw request_raw;
|
||||
char wrong_handle[] = "WHOOPSIE";
|
||||
|
||||
uint64_t wrong_handle = 0x80;
|
||||
|
||||
if( fd_read_request( sock_fd, &request_raw ) == -1){
|
||||
fprintf(stderr, "Problem with fd_read_request\n");
|
||||
@@ -57,7 +56,7 @@ void * responder( void *respond_uncast )
|
||||
fd_write_reply( sock_fd, wrong_handle, 0 );
|
||||
}
|
||||
else {
|
||||
fd_write_reply( sock_fd, resp->received.handle, 0 );
|
||||
fd_write_reply( sock_fd, resp->received.handle.w, 0 );
|
||||
}
|
||||
write( sock_fd, "12345678", 8 );
|
||||
}
|
||||
@@ -150,7 +149,7 @@ END_TEST
|
||||
|
||||
Suite* readwrite_suite(void)
|
||||
{
|
||||
Suite *s = suite_create("acl");
|
||||
Suite *s = suite_create("readwrite");
|
||||
TCase *tc_transfer = tcase_create("entrust");
|
||||
TCase *tc_disconnect = tcase_create("disconnect");
|
||||
|
||||
|
@@ -93,7 +93,7 @@ END_TEST
|
||||
|
||||
int connect_client( char *addr, int actual_port, char *source_addr )
|
||||
{
|
||||
int client_fd;
|
||||
int client_fd = -1;
|
||||
|
||||
struct addrinfo hint;
|
||||
struct addrinfo *ailist, *aip;
|
||||
|
@@ -72,9 +72,11 @@ START_TEST( test_sockaddr_address_string_doesnt_overflow_short_buffer )
|
||||
char testbuf[128];
|
||||
const char* result;
|
||||
|
||||
memset( testbuf, 0, 128 );
|
||||
v4->sin_family = AF_INET;
|
||||
v4->sin_port = htons( 4777 );
|
||||
ck_assert_int_eq( 1, inet_pton( AF_INET, "192.168.0.1", &v4->sin_addr ));
|
||||
memset( &testbuf, 0, 128 );
|
||||
|
||||
result = sockaddr_address_string( &sa, &testbuf[0], 2 );
|
||||
ck_assert( result == NULL );
|
||||
|
@@ -308,6 +308,7 @@ START_TEST( test_renders_migration_statistics )
|
||||
status.migration_speed = 40000000;
|
||||
status.migration_speed_limit = 40000001;
|
||||
status.migration_seconds_left = 1;
|
||||
status.migration_bytes_left = 5000;
|
||||
|
||||
status_write( &status, fds[1] );
|
||||
fail_if_rendered( fds[0], "migration_duration" );
|
||||
@@ -335,6 +336,9 @@ START_TEST( test_renders_migration_statistics )
|
||||
status_write( &status, fds[1] );
|
||||
fail_unless_rendered( fds[0], "migration_seconds_left=1" );
|
||||
|
||||
status_write( &status, fds[1] );
|
||||
fail_unless_rendered( fds[0], "migration_bytes_left=5000" );
|
||||
|
||||
status.migration_speed_limit = UINT64_MAX;
|
||||
|
||||
status_write( &status, fds[1] );
|
||||
|
@@ -71,11 +71,12 @@ START_TEST( test_fatal_kills_process )
|
||||
sleep(10);
|
||||
}
|
||||
else {
|
||||
int kidstatus;
|
||||
int result;
|
||||
result = waitpid( pid, &kidstatus, 0 );
|
||||
int kidret, kidstatus, result;
|
||||
result = waitpid( pid, &kidret, 0 );
|
||||
fail_if( result < 0, "Wait failed." );
|
||||
fail_unless( kidstatus == 6, "Kid was not aborted." );
|
||||
fail_unless( WIFSIGNALED( kidret ), "Process didn't exit via signal" );
|
||||
kidstatus = WTERMSIG( kidret );
|
||||
ck_assert_int_eq( kidstatus, SIGABRT );
|
||||
}
|
||||
|
||||
}
|
||||
@@ -140,9 +141,9 @@ START_TEST( test_fatal_doesnt_call_handler )
|
||||
END_TEST
|
||||
|
||||
|
||||
Suite* error_suite(void)
|
||||
Suite* util_suite(void)
|
||||
{
|
||||
Suite *s = suite_create("error");
|
||||
Suite *s = suite_create("util");
|
||||
TCase *tc_process = tcase_create("process");
|
||||
TCase *tc_handler = tcase_create("handler");
|
||||
|
||||
@@ -162,7 +163,7 @@ Suite* error_suite(void)
|
||||
int main(void)
|
||||
{
|
||||
int number_failed;
|
||||
Suite *s = error_suite();
|
||||
Suite *s = util_suite();
|
||||
SRunner *sr = srunner_create(s);
|
||||
srunner_run_all(sr, CK_NORMAL);
|
||||
number_failed = srunner_ntests_failed(sr);
|
||||
|
Reference in New Issue
Block a user