Compare commits
327 Commits
0.0.2
...
debian/0.1
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c3b241464a | ||
![]() |
4f956e4b9d | ||
![]() |
b4cb2d9240 | ||
![]() |
1efb7bada6 | ||
![]() |
6bc2a4c0b9 | ||
![]() |
59de76c50c | ||
![]() |
209da655b3 | ||
![]() |
52b45e6b40 | ||
![]() |
d279eb7570 | ||
![]() |
c07df76ede | ||
![]() |
e7e99b099c | ||
![]() |
b2edd0734a | ||
![]() |
e19d005636 | ||
![]() |
d1e6e835c4 | ||
![]() |
8fed794fe7 | ||
![]() |
e24efa9864 | ||
![]() |
3134d619ef | ||
![]() |
898f3f6c7e | ||
![]() |
5a1bc21088 | ||
![]() |
deb8f2c53b | ||
![]() |
1338d9e910 | ||
![]() |
47c05174b6 | ||
![]() |
191b3bc72c | ||
![]() |
770ca0d0e5 | ||
![]() |
6505588f25 | ||
![]() |
957707bcfc | ||
![]() |
3f01b77221 | ||
![]() |
0dbea7f8fe | ||
![]() |
091aacd16d | ||
![]() |
04b6637451 | ||
![]() |
7d2eda6cea | ||
![]() |
7e152ca4f2 | ||
![]() |
fe0125efbc | ||
![]() |
ebaaa6d671 | ||
![]() |
8cc8588744 | ||
![]() |
5da77ea39a | ||
![]() |
a744965c67 | ||
![]() |
d07659f694 | ||
![]() |
30562ed900 | ||
![]() |
93c0fa2e92 | ||
![]() |
8dc491fb89 | ||
![]() |
ea7cd64fc2 | ||
![]() |
35d3340708 | ||
![]() |
d47a44a204 | ||
![]() |
d6968d8242 | ||
![]() |
bf85e329a0 | ||
![]() |
edcaef532c | ||
![]() |
cb920e4e9d | ||
![]() |
91d85633b6 | ||
![]() |
7c516b85a6 | ||
![]() |
679fa6dbf8 | ||
![]() |
50708326ec | ||
![]() |
d907025d71 | ||
![]() |
e4d398a078 | ||
![]() |
8de0780125 | ||
![]() |
0fd16822ea | ||
![]() |
1e3c61b541 | ||
![]() |
a09e14b2d4 | ||
![]() |
a6710b6c32 | ||
![]() |
ed3995303f | ||
![]() |
f5de8fb12b | ||
![]() |
99a5f79a52 | ||
![]() |
356e1fd6a1 | ||
![]() |
67dcea207d | ||
![]() |
d3762162db | ||
![]() |
3571d3f82e | ||
![]() |
4cd7e764bb | ||
![]() |
4f535fbb02 | ||
![]() |
218c55fb63 | ||
![]() |
956a602475 | ||
![]() |
26a0a82f9d | ||
![]() |
76e0476113 | ||
![]() |
d9651a038c | ||
![]() |
fcd3d33498 | ||
![]() |
e3360a3a1b | ||
![]() |
1fefe1a669 | ||
![]() |
4ed8d49b2c | ||
![]() |
3af0e84f5f | ||
![]() |
ba14943b60 | ||
![]() |
4a709e73f8 | ||
![]() |
91a8946ddc | ||
![]() |
20f99b4554 | ||
![]() |
c363991cfd | ||
![]() |
c41eeff2fc | ||
![]() |
5960e4d10b | ||
![]() |
f0911b5c6c | ||
![]() |
b063f41ba8 | ||
![]() |
28c7e43e45 | ||
![]() |
9326b6b882 | ||
![]() |
f93476ebd3 | ||
![]() |
666b60ae1c | ||
![]() |
f48bf2b296 | ||
![]() |
705164ae3b | ||
![]() |
dbe7053bf3 | ||
![]() |
fa8023cf69 | ||
![]() |
aba802d415 | ||
![]() |
d146102c2c | ||
![]() |
5551373073 | ||
![]() |
77f333423b | ||
![]() |
ffa45879d7 | ||
![]() |
2fa1ce8e6b | ||
![]() |
6f540ce238 | ||
![]() |
f9a3447bc9 | ||
![]() |
7806ec11ee | ||
![]() |
1817c13acb | ||
![]() |
97c8d7a358 | ||
![]() |
8cf92af900 | ||
![]() |
5185be39c9 | ||
![]() |
374b4c616e | ||
![]() |
50ec8fb7cc | ||
![]() |
5fc9ad6fd8 | ||
![]() |
85c463c4bd | ||
![]() |
278a3151a8 | ||
![]() |
0ea66b1e04 | ||
![]() |
83e3d65be9 | ||
![]() |
4f31bd9340 | ||
![]() |
0baf93fd7b | ||
![]() |
175f19b3e7 | ||
![]() |
8d56316548 | ||
![]() |
27f2cc7083 | ||
![]() |
8084a41ad2 | ||
![]() |
5ca5858929 | ||
![]() |
afcc07a181 | ||
![]() |
dcead04cf6 | ||
![]() |
4f7f5f1745 | ||
![]() |
976e9ba07f | ||
![]() |
91d9531a60 | ||
![]() |
905d66af77 | ||
![]() |
eee7c9644c | ||
![]() |
ce5c51cdcf | ||
![]() |
c6c53c63ba | ||
![]() |
20bd58749e | ||
![]() |
866bf835e6 | ||
![]() |
53cbe14556 | ||
![]() |
cd3281f62d | ||
![]() |
1e5457fed0 | ||
![]() |
0753369b77 | ||
![]() |
9d9ae40953 | ||
![]() |
65d4f581b9 | ||
![]() |
77c71ccf09 | ||
![]() |
97a923afdf | ||
![]() |
335261869d | ||
![]() |
8cf9cae8c0 | ||
![]() |
6986c70888 | ||
![]() |
4b9ded0e1d | ||
![]() |
b177faacd6 | ||
![]() |
96e60a4a29 | ||
![]() |
d87af93cec | ||
![]() |
bc50532321 | ||
![]() |
22f92c5df0 | ||
![]() |
78fc65c515 | ||
![]() |
5c1b119f83 | ||
![]() |
f4793c7059 | ||
![]() |
0f0697a0aa | ||
![]() |
e98c2f2f05 | ||
![]() |
ebe6c4a8ab | ||
![]() |
847b2ec9ad | ||
![]() |
ca9aea0d13 | ||
![]() |
0ae249009c | ||
![]() |
0f2225becf | ||
![]() |
a6c175ed1d | ||
![]() |
94654419c5 | ||
![]() |
e161121c7a | ||
![]() |
150e506780 | ||
![]() |
9a3106f946 | ||
![]() |
71036730c4 | ||
![]() |
6553907972 | ||
![]() |
9770bbe42b | ||
![]() |
6ffa10bf89 | ||
![]() |
eb80c0d235 | ||
![]() |
a5c296f948 | ||
![]() |
77a66c85a0 | ||
![]() |
0172eb1cba | ||
![]() |
c3a5eb0600 | ||
![]() |
0a029fbbf5 | ||
![]() |
83426e1c01 | ||
![]() |
86a000c717 | ||
![]() |
54a41aacdf | ||
![]() |
487bef1f40 | ||
![]() |
0494295705 | ||
![]() |
14fde0f2a1 | ||
![]() |
e13d1d8fb4 | ||
![]() |
efdd613968 | ||
![]() |
d0022402ae | ||
![]() |
28fff91af1 | ||
![]() |
385c9027db | ||
![]() |
b73081e417 | ||
![]() |
cc468b0b17 | ||
![]() |
7128fcc901 | ||
![]() |
45355666f7 | ||
![]() |
8a294e5ee0 | ||
![]() |
c6764b0de1 | ||
![]() |
41facd2ccf | ||
![]() |
f6456349f7 | ||
![]() |
9f4fbe782c | ||
![]() |
8c750a5e9d | ||
![]() |
64702d992d | ||
![]() |
c2df38c9d3 | ||
![]() |
754949d43f | ||
![]() |
1a966ca0be | ||
![]() |
f590f8ed3c | ||
![]() |
bc9ce93648 | ||
![]() |
a5870b8e9b | ||
![]() |
bed8959d47 | ||
![]() |
5c59a412af | ||
![]() |
253cee5a10 | ||
![]() |
7de22a385e | ||
![]() |
14db3315ca | ||
![]() |
efe9eaef7c | ||
![]() |
f8fd4e0437 | ||
![]() |
9a37951aaa | ||
![]() |
d18423c153 | ||
![]() |
1b0fe24529 | ||
![]() |
5c5636b053 | ||
![]() |
afe76debf7 | ||
![]() |
f4bfc70a4b | ||
![]() |
b29ef6d4de | ||
![]() |
dee0bb27d6 | ||
![]() |
f556f298b1 | ||
![]() |
55b452ebef | ||
![]() |
9f34752842 | ||
![]() |
81d41f567d | ||
![]() |
89fd18f6f0 | ||
![]() |
3c56ba0af6 | ||
![]() |
2a9884e9e9 | ||
![]() |
1afea5c73d | ||
![]() |
62bdad2a6e | ||
![]() |
cd0a1f905f | ||
![]() |
2156d06368 | ||
![]() |
b14bba36ec | ||
![]() |
f5c434f21c | ||
![]() |
662b9c2d07 | ||
![]() |
197c1131bf | ||
![]() |
cecf2ebc77 | ||
![]() |
f7e5353355 | ||
![]() |
f9fe421472 | ||
![]() |
1b6c10926f | ||
![]() |
24858fcde5 | ||
![]() |
26c7f1b1c4 | ||
![]() |
055836c8cb | ||
![]() |
76cf2dc7b9 | ||
![]() |
a5a7d45355 | ||
![]() |
e548cc53c8 | ||
![]() |
151b739e8d | ||
![]() |
d9b3aab972 | ||
![]() |
574d44f17f | ||
![]() |
33ee19dc5a | ||
![]() |
4e70db8d7f | ||
![]() |
6984d3709e | ||
![]() |
2bb8434128 | ||
![]() |
e994b80756 | ||
![]() |
5257e93cb7 | ||
![]() |
21ac3cd0ed | ||
![]() |
f89352aa28 | ||
![]() |
1d9f055dc7 | ||
![]() |
e659a78855 | ||
![]() |
78299de299 | ||
![]() |
6842864e74 | ||
![]() |
98d8fbeaf0 | ||
![]() |
9b67d30608 | ||
![]() |
63f7e3e8d4 | ||
![]() |
9826dc6c65 | ||
![]() |
0324d3000d | ||
![]() |
91085b87fc | ||
![]() |
dfa7e1a21b | ||
![]() |
8281809f42 | ||
![]() |
03bc12dd57 | ||
![]() |
58c4a9530b | ||
![]() |
cb7eed28e7 | ||
![]() |
ac560bd907 | ||
![]() |
0fcbe04f80 | ||
![]() |
f63be84d80 | ||
![]() |
8c04564645 | ||
![]() |
ecfd108a53 | ||
![]() |
56ce7d35c2 | ||
![]() |
2dd3db95bc | ||
![]() |
184a13bc9f | ||
![]() |
0b3a71bb03 | ||
![]() |
719bd30071 | ||
![]() |
1afba29b63 | ||
![]() |
7583ffbc4d | ||
![]() |
f002b8ca1f | ||
![]() |
00d7237f66 | ||
![]() |
ed70dacf2f | ||
![]() |
4f650d85c2 | ||
![]() |
dcef6d29e5 | ||
![]() |
22bea81445 | ||
![]() |
83eb31aba4 | ||
![]() |
161d2fccf1 | ||
![]() |
029ebb5ef4 | ||
![]() |
a039ceffcb | ||
![]() |
062ecca1fd | ||
![]() |
cf62b10adf | ||
![]() |
a49cf14927 | ||
![]() |
7b13964c39 | ||
![]() |
1fa8ba82a5 | ||
![]() |
f3e0d61323 | ||
![]() |
32cae67a75 | ||
![]() |
ccbfce1075 | ||
![]() |
ddc57e76d1 | ||
![]() |
1d9c88d4ca | ||
![]() |
8b43321ef2 | ||
![]() |
13328910c8 | ||
![]() |
50001cd6e7 | ||
![]() |
ccf5baa956 | ||
![]() |
ee652a2965 | ||
![]() |
e724d83bec | ||
![]() |
239136064a | ||
![]() |
c3c621f750 | ||
![]() |
c5dfe16f35 | ||
![]() |
b1a4db2727 | ||
![]() |
2c0f86c018 | ||
![]() |
53eca40fad | ||
![]() |
33f95e1986 | ||
![]() |
fd935ce4c9 | ||
![]() |
f6f4266fd6 | ||
![]() |
4790912750 | ||
![]() |
77f4ac29c6 | ||
![]() |
b0f1a027c6 | ||
![]() |
76bbdb4889 | ||
![]() |
314c0c2a2a | ||
![]() |
1caa3d4e27 | ||
![]() |
2e20e7197a | ||
![]() |
8814894874 | ||
![]() |
66ff06fe0e | ||
![]() |
db30ea0c48 | ||
![]() |
9a81af5f8f |
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
**/*.o
|
||||
**/*~
|
||||
flexnbd
|
||||
build/
|
||||
pkg/
|
||||
**/*.orig
|
||||
**/.*.swp
|
||||
cscope.out
|
||||
valgrind.out
|
10
.gitlab-ci.yml
Normal file
10
.gitlab-ci.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
image: "ruby:2.1"
|
||||
|
||||
before_script:
|
||||
- apt-get update; apt-get install -y check libev-dev net-tools dpkg-dev
|
||||
|
||||
unit_test:
|
||||
script:
|
||||
- make clean
|
||||
- make build
|
||||
- make test
|
@@ -1,9 +0,0 @@
|
||||
.o$
|
||||
~$
|
||||
^flexnbd$
|
||||
^build/
|
||||
^pkg/
|
||||
\.orig$
|
||||
.*\.swp$
|
||||
cscope.out$
|
||||
valgrind.out$
|
135
Makefile
Normal file
135
Makefile
Normal file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
VPATH=src:tests/unit
|
||||
DESTDIR?=/
|
||||
PREFIX?=/usr/local/bin
|
||||
INSTALLDIR=$(DESTDIR)/$(PREFIX)
|
||||
|
||||
ifdef DEBUG
|
||||
CFLAGS_EXTRA=-g -DDEBUG
|
||||
LDFLAGS_EXTRA=-g
|
||||
else
|
||||
CFLAGS_EXTRA=-O2
|
||||
endif
|
||||
CFLAGS_EXTRA += -fPIC --std=gnu99
|
||||
LDFLAGS_EXTRA += -Wl,--relax,--gc-sections
|
||||
|
||||
TOOLCHAIN := $(shell $(CC) --version|awk '/Debian/ {print "debian";exit;}')
|
||||
#
|
||||
# This bit adds extra flags depending of the distro, and the
|
||||
# architecture. To make sure debian packages have the right
|
||||
# set of 'native' flags on them
|
||||
#
|
||||
ifeq ($(TOOLCHAIN),debian)
|
||||
DEBARCH := $(shell dpkg-architecture -qDEB_BUILD_ARCH)
|
||||
ifeq ($(DEBARCH),$(filter $(DEBARCH),amd64 i386))
|
||||
CFLAGS_EXTRA += -march=native
|
||||
endif
|
||||
ifeq ($(DEBARCH),armhf)
|
||||
CFLAGS_EXTRA += -march=armv7-a -mtune=cortex-a8 -mfpu=neon
|
||||
endif
|
||||
LDFLAGS_EXTRA += -L$(LIB) -Wl,-rpath,${shell readlink -f ${LIB}}
|
||||
else
|
||||
LDFLAGS_EXTRA += -L$(LIB) -Wl,-rpath-link,$(LIB)
|
||||
endif
|
||||
|
||||
|
||||
# The -Wunreachable-code warning is only implemented in clang, but it
|
||||
# doesn't break anything for gcc to see it.
|
||||
WARNINGS=-Wall \
|
||||
-Wextra \
|
||||
-Werror-implicit-function-declaration \
|
||||
-Wstrict-prototypes \
|
||||
-Wno-missing-field-initializers \
|
||||
-Wunreachable-code
|
||||
CCFLAGS=-D_GNU_SOURCE=1 $(WARNINGS) $(CFLAGS_EXTRA) $(CFLAGS)
|
||||
LLDFLAGS=-lm -lrt -lev $(LDFLAGS_EXTRA) $(LDFLAGS)
|
||||
|
||||
|
||||
CC?=gcc
|
||||
|
||||
LIBS=-lpthread
|
||||
INC=-I/usr/include/libev -Isrc/common -Isrc/server -Isrc/proxy
|
||||
COMPILE=$(CC) -MMD $(INC) -c $(CCFLAGS)
|
||||
LINK=$(CC) $(LLDFLAGS) -Isrc $(LIBS)
|
||||
|
||||
LIB=build/
|
||||
|
||||
COMMON_SRC := $(wildcard src/common/*.c)
|
||||
SERVER_SRC := $(wildcard src/server/*.c)
|
||||
PROXY_SRC := $(wildcard src/proxy/*.c)
|
||||
|
||||
COMMON_OBJ := $(COMMON_SRC:src/%.c=build/%.o)
|
||||
SERVER_OBJ := $(SERVER_SRC:src/%.c=build/%.o)
|
||||
PROXY_OBJ := $(PROXY_SRC:src/%.c=build/%.o)
|
||||
|
||||
SRCS := $(COMMON_SRC) $(SERVER_SRC) $(PROXY_SRC)
|
||||
OBJS := $(COMMON_OBJ) $(SERVER_OBJ) $(PROXY_OBJ)
|
||||
|
||||
|
||||
all: build doc
|
||||
|
||||
build: server proxy
|
||||
|
||||
build/%.o: %.c
|
||||
mkdir -p $(dir $@)
|
||||
$(COMPILE) $< -o $@
|
||||
|
||||
objs: $(OBJS)
|
||||
|
||||
build/flexnbd: $(COMMON_OBJ) $(SERVER_OBJ) build/main.o
|
||||
$(LINK) $^ -o $@
|
||||
|
||||
build/flexnbd-proxy: $(COMMON_OBJ) $(PROXY_OBJ) build/proxy-main.o
|
||||
$(LINK) $^ -o $@
|
||||
|
||||
server: build/flexnbd
|
||||
|
||||
proxy: build/flexnbd-proxy
|
||||
|
||||
CHECK_SRC := $(wildcard tests/unit/*.c)
|
||||
CHECK_OBJ := $(CHECK_SRC:tests/unit/%.c=build/%.o)
|
||||
# Why can't we reuse the build/%.o rule above? Not sure.
|
||||
|
||||
CHECK_BINS := $(CHECK_SRC:tests/unit/%.c=build/%)
|
||||
|
||||
build/check_%: build/check_%.o
|
||||
$(LINK) $^ -o $@ $(COMMON_OBJ) $(SERVER_OBJ) -lcheck
|
||||
|
||||
check_objs: $(CHECK_OBJ)
|
||||
|
||||
check_bins: $(CHECK_BINS)
|
||||
|
||||
check: $(OBJS) $(CHECK_BINS)
|
||||
r=true ; for bin in $(CHECK_BINS); do $$bin || r=false; done ; $$r
|
||||
|
||||
acceptance: build
|
||||
cd tests/acceptance && RUBYOPT='-I.' ruby nbd_scenarios -v
|
||||
|
||||
test: check acceptance
|
||||
|
||||
build/flexnbd.1: README.txt
|
||||
txt2man -t flexnbd -s 1 $< > $@
|
||||
|
||||
build/flexnbd-proxy.1: README.proxy.txt
|
||||
txt2man -t flexnbd-proxy -s 1 $< > $@
|
||||
|
||||
# If we don't pipe to file, gzip clobbers the original, causing make
|
||||
# to rebuild each time
|
||||
%.1.gz: %.1
|
||||
gzip -c -f $< > $@
|
||||
|
||||
doc: build/flexnbd.1.gz build/flexnbd-proxy.1.gz
|
||||
|
||||
install:
|
||||
mkdir -p $(INSTALLDIR)
|
||||
cp build/flexnbd build/flexnbd-proxy $(INSTALLDIR)
|
||||
|
||||
clean:
|
||||
rm -rf build/*
|
||||
|
||||
|
||||
.PHONY: clean objs check_objs all server proxy check_bins check doc build test acceptance
|
||||
|
||||
# Include extra dependencies at the end, NOT before 'all'
|
||||
-include $(wildcard build/*.d)
|
206
README.proxy.txt
Normal file
206
README.proxy.txt
Normal file
@@ -0,0 +1,206 @@
|
||||
NAME
|
||||
|
||||
flexnbd-proxy - A simple NBD proxy
|
||||
|
||||
SYNOPSIS
|
||||
|
||||
flexnbd-proxy --addr ADDR [--port PORT] --conn-addr ADDR
|
||||
--conn-port PORT [--bind ADDR] [--cache[=CACHE_BYTES]]
|
||||
[--help] [--verbose] [--quiet]
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
flexnbd-proxy is a simple NBD proxy server that implements resilient
|
||||
connection logic for the client. It connects to an upstream NBD server
|
||||
and allows a single client to connect to it. All server properties are
|
||||
proxied to the client, and the client connection is kept alive across
|
||||
reconnections to the upstream server. If the upstream goes away while
|
||||
an NBD request is in-flight then the proxy (silently, from the point
|
||||
of view of the client) reconnects and retransmits the request, before
|
||||
returning the response to the client.
|
||||
|
||||
USAGE
|
||||
|
||||
Proxy requests from an NBD client to an NBD server, resiliently. Only one
|
||||
client can be connected at a time, and ACLs cannot be applied to the client, as they
|
||||
can be to clients connecting directly to a flexnbd in serve mode.
|
||||
|
||||
On starting up, the proxy will attempt to connect to the server specified by
|
||||
--conn-addr and --conn-port (from the address specified by --bind, if given). If
|
||||
it fails, then the process will die with an error exit status.
|
||||
|
||||
Assuming a successful connection to the `upstream` server is made, the proxy
|
||||
will then start listening on the address specified by --addr and --port, waiting
|
||||
for `downstream` to connect to it (this will be your NBD client). The client
|
||||
will be given the same hello message as the proxy was given by the server.
|
||||
|
||||
When connected, any request the client makes will be read by the proxy and sent
|
||||
to the server. If the server goes away for any reason, the proxy will remember
|
||||
the request and regularly (~ every 5 seconds) try to reconnect to the server.
|
||||
Upon reconnection, the request is sent and a reply is waited for. When a reply
|
||||
is received, it is sent back to the client.
|
||||
|
||||
When the client disconnects, cleanly or otherwise, the proxy goes back to
|
||||
waiting for a new client to connect. The connection to the server is maintained
|
||||
at that point, in case it is needed again.
|
||||
|
||||
Only one request may be in-flight at a time under the current architecture; that
|
||||
doesn't seem to slow things down much relative to alternative options, but may
|
||||
be changed in the future if it becomes an issue.
|
||||
|
||||
OPTIONS
|
||||
|
||||
--addr, -l ADDR
|
||||
The address to listen on. If this begins with a '/', it is assumed to be
|
||||
a UNIX domain socket to create. Otherwise, it should be an IPv4 or IPv6
|
||||
address.
|
||||
|
||||
--port, -p PORT
|
||||
The port to listen on, if --addr is not a UNIX socket.
|
||||
|
||||
--conn-addr, -C ADDR
|
||||
The address of the NBD server to connect to. Required.
|
||||
|
||||
--conn-port, -P PORT
|
||||
The port of the NBD server to connect to. Required.
|
||||
|
||||
--cache, -c=CACHE_BYTES
|
||||
If given, the size in bytes of read cache to use. CACHE_BYTES
|
||||
defaults to 4096.
|
||||
|
||||
--help, -h
|
||||
Show command or global help.
|
||||
|
||||
--verbose, -v
|
||||
Output all available log information to STDERR.
|
||||
|
||||
--quiet, -q
|
||||
Output as little log information as possible to STDERR.
|
||||
|
||||
LOGGING
|
||||
|
||||
Log output is sent to STDERR. If --quiet is set, no output will be
|
||||
seen unless the program termintes abnormally. If neither --quiet nor
|
||||
--verbose are set, no output will be seen unless something goes wrong
|
||||
with a specific request. If --verbose is given, every available log
|
||||
message will be seen (which, for a debug build, is many). It is not an
|
||||
error to set both --verbose and --quiet. The last one wins.
|
||||
|
||||
The log line format is:
|
||||
|
||||
<TIMESTAMP>:<LEVEL>:<PID> <THREAD> <SOURCEFILE>:<SOURCELINE>: <MSG>
|
||||
|
||||
<TIMESTAMP>
|
||||
Time the log entry was made. This is expressed in terms of monotonic ms
|
||||
|
||||
<LEVEL>
|
||||
This will be one of 'D', 'I', 'W', 'E', 'F' in increasing order of
|
||||
severity. If flexnbd is started with the --quiet flag, only 'F' will
|
||||
be seen. If it is started with the --verbose flag, any from 'I'
|
||||
upwards will be seen. Only if you have a debug build and start it
|
||||
with --verbose will you see 'D' entries.
|
||||
|
||||
<PID>
|
||||
This is the process ID.
|
||||
|
||||
<THREAD>
|
||||
flexnbd-proxy is currently single-threaded, so this should be the
|
||||
same for all lines. That may not be the case in the future.
|
||||
|
||||
<SOURCEFILE:SOURCELINE>
|
||||
Identifies where in the source code this log line can be found.
|
||||
|
||||
<MSG>
|
||||
A short message describing what's happening, how it's being done, or
|
||||
if you're very lucky why it's going on.
|
||||
|
||||
EXAMPLES
|
||||
|
||||
The main point of the proxy mode is to allow clients that would otherwise break
|
||||
when the NBD server goes away (during a migration, for instance) to see a
|
||||
persistent TCP connection throughout the process, instead of needing its own
|
||||
reconnection logic.
|
||||
|
||||
For maximum reliability, the proxy process would be run on the same machine as
|
||||
the actual NBD client; an example might look like:
|
||||
|
||||
nbd-server-1$ flexnbd serve -l 10.0.0.1 -p 4777 myfile [...]
|
||||
|
||||
nbd-client-1$ flexnbd-proxy -l 127.0.0.1 -p 4777 -C 10.0.0.1 -P 4777
|
||||
nbd-client-1$ nbd-client -c 127.0.0.1 4777 /dev/nbd0
|
||||
|
||||
nbd-server-2$ flexnbd listen -l 10.0.0.2 -p 4777 -f myfile [...]
|
||||
|
||||
nbd-server-1$ flexnbd mirror --addr 10.0.0.2 -p 4777 [...]
|
||||
|
||||
Upon completing the migration, the mirroring and listening flexnbd servers will
|
||||
both exit. With the proxy mediating requests, this does not break the TCP
|
||||
connection that nbd-client is holding open. If no requests are in-flight, it
|
||||
will not notice anything at all; if requests are in-flight, then the reply may
|
||||
take longer than usual to be returned.
|
||||
|
||||
When flexnbd is restarted in serve mode on the second server:
|
||||
|
||||
nbd-server-2$ flexnbd serve -l 10.0.0.1 -p 4777 -f myfile [...]
|
||||
|
||||
The proxy notices and reconnects, fulfiling any request it has in its buffer.
|
||||
The data in myfile has been moved between physical servers without the nbd
|
||||
client process having to be disturbed at all.
|
||||
|
||||
READ CACHE
|
||||
|
||||
If the --cache option is given at the command line, either without an
|
||||
argument or with an argument greater than 0, flexnbd-proxy will use a
|
||||
read-ahead cache. The cache as currently implemented doubles each read
|
||||
request size, up to a maximum of 2xCACHE_BYTES, and retains the latter
|
||||
half in a buffer. If the next read request from the client exactly
|
||||
matches the region held in the buffer, flexnbd-proxy responds from the
|
||||
cache without making a request to the server.
|
||||
|
||||
This pattern is designed to match sequential reads, such as those
|
||||
performed by a booting virtual machine.
|
||||
|
||||
Note: If specifying a cache size, you must use this form:
|
||||
|
||||
nbd-client$ flexnbd-proxy --cache=XXXX
|
||||
|
||||
That is, the '=' is required. This is a limitation of getopt-long.
|
||||
|
||||
If no cache size is given, a size of 4096 bytes is assumed. Caching can
|
||||
be explicitly disabled by setting a size of 0.
|
||||
|
||||
BUGS
|
||||
|
||||
Should be reported via GitHub.
|
||||
|
||||
* https://github.com/BytemarkHosting/flexnbd-c/issues
|
||||
|
||||
Current issues include:
|
||||
|
||||
* only old-style NBD negotiation is supported;
|
||||
* only one request may be in-flight at a time;
|
||||
* all I/O is blocking, and signals terminate the process immediately;
|
||||
* UNIX socket support is limited to the listen address;
|
||||
* FLUSH and TRIM commands, and the FUA flag, are not supported;
|
||||
* DISCONNECT requests do not get passed through to the NBD server;
|
||||
* no active timeout-retry of requests - we trust the kernel's idea of
|
||||
failure.
|
||||
|
||||
AUTHOR
|
||||
|
||||
Originally written by Alex Young <alex@blackkettle.org>.
|
||||
Original concept and core code by Matthew Bloch <matthew@bytemark.co.uk>.
|
||||
Proxy mode written by Nick Thomas <me@ur.gs>.
|
||||
|
||||
The full commit history is available on GitHub.
|
||||
|
||||
SEE ALSO
|
||||
|
||||
flexnbd(1), nbd-client(8), xnbd-server(8), xnbd-client(8)
|
||||
|
||||
COPYRIGHT
|
||||
|
||||
Copyright (c) 2012-2016 Bytemark Hosting Ltd. Free use of this
|
||||
software is granted under the terms of the GNU General Public License
|
||||
version 3 or later.
|
||||
|
377
README.txt
377
README.txt
@@ -1,17 +1,36 @@
|
||||
FLEXNBD(1)
|
||||
==========
|
||||
:doctype: manpage
|
||||
|
||||
NAME
|
||||
----
|
||||
|
||||
flexnbd - A fast NBD server
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
*flexnbd* 'COMMAND' ['OPTIONS']
|
||||
|
||||
flexnbd MODE [ ARGS ]
|
||||
|
||||
flexnbd serve --addr ADDR --port PORT --file FILE [--sock SOCK]
|
||||
[--default-deny] [--killswitch] [global_option]* [acl_entry]*
|
||||
|
||||
flexnbd listen --addr ADDR --port PORT --file FILE [--sock SOCK]
|
||||
[--default-deny] [global_option]* [acl_entry]*
|
||||
|
||||
flexnbd mirror --addr ADDR --port PORT --sock SOCK [--unlink]
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
flexnbd acl --sock SOCK [acl_entry]+ [global_option]*
|
||||
|
||||
flexnbd break --sock SOCK [global_option]*
|
||||
|
||||
flexnbd status --sock SOCK [global_option]*
|
||||
|
||||
flexnbd read --addr ADDR --port PORT --from OFFSET --size SIZE
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
flexnbd write --addr ADDR --port PORT --from OFFSET --size SIZE
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
flexnbd help [mode] [global_option]*
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
|
||||
Flexnbd is a fast NBD server which supports live migration. Live
|
||||
migration is performed by writing the data to a new server. A failed
|
||||
migration will be invisible to any connected clients.
|
||||
@@ -19,90 +38,85 @@ migration will be invisible to any connected clients.
|
||||
Flexnbd tries quite hard to preserve sparsity of files it is serving,
|
||||
even across migrations.
|
||||
|
||||
COMMANDS
|
||||
--------
|
||||
SERVE MODE
|
||||
|
||||
Serve a file.
|
||||
|
||||
serve
|
||||
~~~~~
|
||||
$ flexnbd serve --addr <ADDR> --port <PORT> --file <FILE>
|
||||
[--sock <SOCK>] [--default-deny] [global option]* [acl entry]*
|
||||
[--sock <SOCK>] [--default-deny] [-k] [global_option]*
|
||||
[acl_entry]*
|
||||
|
||||
Serve a file. If any ACL entries are given (which should be IP
|
||||
If any ACL entries are given (which should be IP
|
||||
addresses), only those clients listed will be permitted to connect.
|
||||
|
||||
flexnbd will continue to serve until a SIGINT, SIGQUIT, or a successful
|
||||
migration.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
OPTIONS
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
--addr, -l ADDR
|
||||
The address to listen on. Required.
|
||||
|
||||
*--port, -p PORT*:
|
||||
--port, -p PORT
|
||||
The port to listen on. Required.
|
||||
|
||||
*--file, -f FILE*:
|
||||
--file, -f FILE
|
||||
The file to serve. Must already exist. Required.
|
||||
|
||||
*--sock, -s SOCK*:
|
||||
--sock, -s SOCK
|
||||
Path to a control socket to open. You will need this if you want to
|
||||
migrate, get the current status, or manipulate the access control
|
||||
list.
|
||||
|
||||
*--default-deny, -d*:
|
||||
--default-deny, -d
|
||||
How to interpret an empty ACL. If --default-deny is given, an
|
||||
empty ACL will let no clients connect. If it is not given, an
|
||||
empty ACL will let any client connect.
|
||||
|
||||
listen
|
||||
~~~~~~
|
||||
--killswitch, -k
|
||||
If set, we implement a 2-minute timeout on NBD requests and
|
||||
responses. If a request takes longer than that to complete,
|
||||
the client is disconnected. This is useful to keep broken
|
||||
clients from breaking migrations, among other things.
|
||||
|
||||
$ flexnbd listen --addr <ADDR> --port <PORT> --file <FILE>
|
||||
[--rebind-addr <REBIND-ADDR>] [--rebind-port <REBIND-PORT>]
|
||||
[--sock <SOCK>] [--default-deny] [global option]* [acl entry]*
|
||||
LISTEN MODE
|
||||
|
||||
Listen for an inbound migration, then serve it as normal once it has
|
||||
completed.
|
||||
Listen for an inbound migration, and quit with a status of 0 on
|
||||
completion.
|
||||
|
||||
flexnbd will wait for a successful migration, and then switch into
|
||||
'serve' mode. The file to write the inbound migration data to must
|
||||
already exist before you run 'flexnbd listen'.
|
||||
$ flexnbd listen --addr ADDR --port PORT --file FILE
|
||||
[--sock SOCK] [--default-deny] [global_option]*
|
||||
[acl_entry]*
|
||||
|
||||
Only one sender may connect to send data, and the server is not
|
||||
available to clients while the migration is taking place.
|
||||
flexnbd will wait for a successful migration, and then quit. The file
|
||||
to write the inbound migration data to must already exist before you
|
||||
run 'flexnbd listen'.
|
||||
|
||||
If the sender disconnects part-way through the migration, the
|
||||
destination will expect it to reconnect and retry the whole migration.
|
||||
It isn't safe to assume that a partial migration can be resumed because
|
||||
the destination has no knowledge of whether a client has made a write to
|
||||
Only one sender may connect to send data, and if the sender
|
||||
disconnects part-way through the migration, the destination will
|
||||
expect it to reconnect and retry the whole migration. It isn't safe
|
||||
to assume that a partial migration can be resumed because the
|
||||
destination has no knowledge of whether a client has made a write to
|
||||
the source in the interim.
|
||||
|
||||
To support transparently replacing an existing server, flexnbd can
|
||||
switch addresses once it has received a successful migration.
|
||||
If the migration fails for a reason which the 'flexnbd listen' process
|
||||
can't fix (say, a failed local write), it will exit with an error
|
||||
status. In this case, the sender will continually retry the migration
|
||||
until it succeeds, and you will need to restart the 'flexnbd listen'
|
||||
process to allow that to happen.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
As for 'serve', with these additions:
|
||||
OPTIONS
|
||||
|
||||
*--rebind-addr, -L REBIND_ADDR*:
|
||||
The address to rebind to once migration has completed.
|
||||
As for serve.
|
||||
|
||||
*--rebind-port, -P REBIND_PORT*:
|
||||
The port to rebind to once migration has completed.
|
||||
|
||||
Either, both, or neither of --rebind-port and rebind-addr may be given.
|
||||
If rebinding fails, flexnbd will retry every second until it succeeds.
|
||||
|
||||
mirror
|
||||
~~~~~~
|
||||
|
||||
$ flexnbd mirror --addr <ADDR> --port <PORT> --sock SOCK
|
||||
[--bind <BIND-ADDR>] [global option]*
|
||||
MIRROR MODE
|
||||
|
||||
Start a migration from the server with control socket SOCK to the server
|
||||
listening at ADDR:PORT.
|
||||
|
||||
$ flexnbd mirror --addr ADDR --port PORT --sock SOCK [--unlink]
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
Migration can be a slow process. Rather than block the 'flexnbd mirror'
|
||||
process until it completes, it will exit with a message of "Migration
|
||||
started" once it has confirmation that the local server was able to
|
||||
@@ -115,142 +129,160 @@ again. It is not safe to resume the migration from where it left off
|
||||
because the source can't see that the backing store behind the
|
||||
destination is intact, or even on the same machine.
|
||||
|
||||
Note: files smaller than 4096 bytes cannot be migrated.
|
||||
If the --unlink option is given, the local file will be deleted
|
||||
immediately before the mirror connection is terminated. This allows
|
||||
an otherwise-ambiguous situation to be resolved: if you don't unlink
|
||||
the file and the flexnbd process at either end is terminated, it's not
|
||||
possible to tell which copy of the data is canonical. Since the
|
||||
unlink happens as soon as the sender knows that it has transmitted all
|
||||
the data, there can be no ambiguity.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
Note: files smaller than 4096 bytes cannot be mirrored.
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
OPTIONS
|
||||
|
||||
--addr, -l ADDR
|
||||
The address of the remote server to migrate to. Required.
|
||||
|
||||
*--port, -p PORT*:
|
||||
--port, -p PORT
|
||||
The port of the remote server to migrate to. Required.
|
||||
|
||||
*--sock, -s SOCK*:
|
||||
--sock, -s SOCK
|
||||
The control socket of the local server to migrate from. Required.
|
||||
|
||||
*--bind, -b BIND-ADDR*:
|
||||
The local address to bind to. You may need this if the remote server
|
||||
is using an access control list.
|
||||
--unlink, -u
|
||||
Unlink the served file from the local filesystem after
|
||||
successfully mirroring.
|
||||
|
||||
acl
|
||||
~~~
|
||||
--bind, -b BIND_ADDR
|
||||
The local address to bind to. You may need this if the remote
|
||||
server is using an access control list.
|
||||
|
||||
$ flexnbd acl --sock <SOCK> [acl entry]+ [global option]*
|
||||
BREAK MODE
|
||||
|
||||
Stop a running migration.
|
||||
|
||||
$ flexnbd break --sock SOCK [global_option]*
|
||||
|
||||
OPTIONS
|
||||
|
||||
--sock, -s SOCK
|
||||
The control socket of the local server whose migration to stop.
|
||||
Required.
|
||||
|
||||
ACL MODE
|
||||
|
||||
Set the access control list of the server with the control socket SOCK
|
||||
to the given access control list entries.
|
||||
|
||||
$ flexnbd acl --sock SOCK [acl_entry]+ [global_option]*
|
||||
|
||||
ACL entries are given as IP addresses.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
OPTIONS
|
||||
|
||||
*--sock, -s SOCK*:
|
||||
The control socket of the server whose ACL to replace.
|
||||
--sock, -s SOCK
|
||||
The control socket of the server whose ACL to replace. Required
|
||||
|
||||
status
|
||||
~~~~~~
|
||||
|
||||
$ flexnbd status --sock <SOCK> [global option]*
|
||||
STATUS MODE
|
||||
|
||||
Get the current status of the server with control socket SOCK.
|
||||
|
||||
$ flexnbd status --sock SOCK [global_option]*
|
||||
|
||||
The status will be printed to STDOUT. It is a space-separated list of
|
||||
key=value pairs. The space character will never appear in a key or
|
||||
value. Currently reported values are:
|
||||
|
||||
*is_mirroring*:
|
||||
pid
|
||||
The process id of the server listening on SOCK.
|
||||
|
||||
is_mirroring
|
||||
'true' if this server is sending migration data, 'false' otherwise.
|
||||
|
||||
*has_control*:
|
||||
'false' if this server was started in 'listen' mode and has not yet
|
||||
received a successful migration. 'true' otherwise.
|
||||
has_control
|
||||
'false' if this server was started in 'listen' mode. 'true' otherwise.
|
||||
|
||||
read
|
||||
~~~~
|
||||
OPTIONS
|
||||
|
||||
$ flexnbd read --addr <ADDR> --port <PORT> --from <OFFSET>
|
||||
--size <SIZE> [--bind BIND-ADDR] [global option]*
|
||||
--sock, -s SOCK
|
||||
The control socket of the server of interest. Required.
|
||||
|
||||
READ MODE
|
||||
|
||||
Connect to the server at ADDR:PORT, and read SIZE bytes starting at
|
||||
OFFSET in a single NBD query. The returned data will be echoed to
|
||||
STDOUT. In case of a remote ACL, set the local source address to
|
||||
BIND-ADDR.
|
||||
OFFSET in a single NBD query.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
$ flexnbd read --addr ADDR --port PORT --from OFFSET --size SIZE
|
||||
[--bind BIND_ADDR] [global_option]*
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
The returned data will be echoed to STDOUT. In case of a remote ACL,
|
||||
set the local source address to BIND_ADDR.
|
||||
|
||||
OPTIONS
|
||||
|
||||
--addr, -l ADDR
|
||||
The address of the remote server. Required.
|
||||
|
||||
*--port, -p PORT*:
|
||||
--port, -p PORT
|
||||
The port of the remote server. Required.
|
||||
|
||||
*--from, -F OFFSET*:
|
||||
--from, -F OFFSET
|
||||
The byte offset to start reading from. Required. Maximum 2^62.
|
||||
|
||||
*--size, -S SIZE*:
|
||||
--size, -S SIZE
|
||||
The number of bytes to read. Required. Maximum 2^30.
|
||||
|
||||
*--bind, -b BIND-ADDR*:
|
||||
The local address to bind to. You may need this if the remote server
|
||||
is using an access control list.
|
||||
--bind, -b BIND_ADDR
|
||||
The local address to bind to. You may need this if the remote
|
||||
server is using an access control list.
|
||||
|
||||
write
|
||||
~~~~~
|
||||
|
||||
$ cat ... | flexnbd write --addr <ADDR> --port <PORT> --from <OFFSET>
|
||||
--size <SIZE> [--bind BIND-ADDR] [global option]*
|
||||
WRITE MODE
|
||||
|
||||
Connect to the server at ADDR:PORT, and write SIZE bytes from STDIN
|
||||
starting at OFFSET in a single NBD query. In case of a remote ACL, set
|
||||
the local source address to BIND-ADDR.
|
||||
starting at OFFSET in a single NBD query.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
$ cat ... | flexnbd write --addr ADDR --port PORT --from OFFSET
|
||||
--size SIZE [--bind BIND_ADDR] [global_option]*
|
||||
|
||||
*--addr, -l ADDR*:
|
||||
In case of a remote ACL, set the local source address to BIND_ADDR.
|
||||
|
||||
OPTIONS
|
||||
|
||||
--addr, -l ADDR
|
||||
The address of the remote server. Required.
|
||||
|
||||
*--port, -p PORT*:
|
||||
--port, -p PORT
|
||||
The port of the remote server. Required.
|
||||
|
||||
*--from, -F OFFSET*:
|
||||
--from, -F OFFSET
|
||||
The byte offset to start writing from. Required. Maximum 2^62.
|
||||
|
||||
*--size, -S SIZE*:
|
||||
--size, -S SIZE
|
||||
The number of bytes to write. Required. Maximum 2^30.
|
||||
|
||||
*--bind, -b BIND-ADDR*:
|
||||
The local address to bind to. You may need this if the remote server
|
||||
is using an access control list.
|
||||
--bind, -b BIND_ADDR
|
||||
The local address to bind to. You may need this if the remote
|
||||
server is using an access control list.
|
||||
|
||||
help
|
||||
~~~~
|
||||
HELP MODE
|
||||
|
||||
$ flexnbd help [command] [global option]*
|
||||
$ flexnbd help [mode] [global_option]*
|
||||
|
||||
Without 'command', show the list of available commands. With 'command',
|
||||
show help for that command.
|
||||
Without mode, show the list of available modes. With mode, show help for that mode.
|
||||
|
||||
GLOBAL OPTIONS
|
||||
--------------
|
||||
|
||||
*--help, -h* :
|
||||
Show command or global help.
|
||||
--help, -h Show mode or global help.
|
||||
|
||||
*--verbose, -v* :
|
||||
Output all available log information to STDERR.
|
||||
|
||||
*--quiet, -q* :
|
||||
Output as little log information as possible to STDERR.
|
||||
--verbose, -v Output all available log information to STDERR.
|
||||
|
||||
--quiet, -q Output as little log information as possible to STDERR.
|
||||
|
||||
LOGGING
|
||||
-------
|
||||
Log output is sent to STDERR. If --quiet is set, no output will be seen
|
||||
unless the program termintes abnormally. If neither --quiet nor
|
||||
|
||||
Log output is sent to STDERR. If --quiet is set, no output will be
|
||||
seen unless the program termintes abnormally. If neither --quiet nor
|
||||
--verbose are set, no output will be seen unless something goes wrong
|
||||
with a specific request. If --verbose is given, every available log
|
||||
message will be seen (which, for a debug build, is many). It is not an
|
||||
@@ -258,36 +290,38 @@ error to set both --verbose and --quiet. The last one wins.
|
||||
|
||||
The log line format is:
|
||||
|
||||
<LEVEL>:<PID> <THREAD> <SOURCEFILE>:<SOURCELINE>: <MSG>
|
||||
<TIMESTAMP>:<LEVEL>:<PID> <THREAD> <SOURCEFILE:SOURCELINE>: <MSG>
|
||||
|
||||
*LEVEL*:
|
||||
<TIMESTAMP>
|
||||
Time the log entry was made. This is expressed in terms of monotonic
|
||||
ms.
|
||||
|
||||
<LEVEL>
|
||||
This will be one of 'D', 'I', 'W', 'E', 'F' in increasing order of
|
||||
severity. If flexnbd is started with the --quiet flag, only 'F' will be
|
||||
seen. If it is started with the --verbose flag, any from 'I' upwards
|
||||
will be seen. Only if you have a debug build and start it with
|
||||
--verbose will you see 'D' entries.
|
||||
severity. If flexnbd is started with the --quiet flag, only 'F'
|
||||
will be seen. If it is started with the --verbose flag, any from 'I'
|
||||
upwards will be seen. Only if you have a debug build and start it
|
||||
with --verbose will you see 'D' entries.
|
||||
|
||||
*PID*:
|
||||
<PID>
|
||||
This is the process ID.
|
||||
|
||||
*THREAD*:
|
||||
There are several pthreads per flexnbd process: a main thread, a serve
|
||||
thread, a thread per client, and possibly a pair of mirror threads and a
|
||||
control thread. This field identifies which thread was responsible for
|
||||
the log line.
|
||||
<THREAD>
|
||||
There are several pthreads per flexnbd process: a main thread, a
|
||||
serve thread, a thread per client, and possibly a pair of mirror
|
||||
threads and a control thread. This field identifies which thread was
|
||||
responsible for the log line.
|
||||
|
||||
*SOURCEFILE:SOURCELINE*:
|
||||
<SOURCEFILE:SOURCELINE>
|
||||
Identifies where in the source code this log line can be found.
|
||||
|
||||
*MSG*:
|
||||
<MSG>
|
||||
A short message describing what's happening, how it's being done, or
|
||||
if you're very lucky *why* it's going on.
|
||||
if you're very lucky why it's going on.
|
||||
|
||||
EXAMPLES
|
||||
--------
|
||||
|
||||
Serving a file
|
||||
~~~~~~~~~~~~~~
|
||||
SERVING A FILE
|
||||
|
||||
The simplest case is serving a file on the default nbd port:
|
||||
|
||||
@@ -297,26 +331,24 @@ The simplest case is serving a file on the default nbd port:
|
||||
root:x:
|
||||
$
|
||||
|
||||
Reading server status
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
READING SERVER STATUS
|
||||
|
||||
In order to read a server's status, we need it to open a control socket.
|
||||
|
||||
$ flexnbd serve --file /tmp/passwd --addr 0.0.0.0 --port 4777 \
|
||||
--sock /tmp/flexnbd.sock
|
||||
$ flexnbd status --sock /tmp/flexnbd.sock
|
||||
is_mirroring=false has_control=true
|
||||
pid=9635 is_mirroring=false has_control=true
|
||||
|
||||
$
|
||||
|
||||
Note that the status output is newline-terminated.
|
||||
|
||||
Migrating
|
||||
~~~~~~~~~
|
||||
MIGRATING
|
||||
|
||||
To migrate, we need to provide a destination file of the right size.
|
||||
|
||||
$ dd if=/dev/random of=/tmp/data bs=1M count=1
|
||||
$ dd if=/dev/urandom of=/tmp/data bs=1024 count=1K
|
||||
$ truncate -s 1M /tmp/data.copy
|
||||
$ flexnbd serve --file /tmp/data --addr 0.0.0.0 --port 4778 \
|
||||
--sock /tmp/flex-source.sock &
|
||||
@@ -328,9 +360,9 @@ Now we check the status of each server, to check that they are both in
|
||||
the right state:
|
||||
|
||||
$ flexnbd status --sock /tmp/flex-source.sock
|
||||
is_mirroring=false has_control=true
|
||||
pid=9648 is_mirroring=false has_control=true
|
||||
$ flexnbd status --sock /tmp/flex-dest.sock
|
||||
is_mirroring=false has_control=false
|
||||
pid=9651 is_mirroring=false has_control=false
|
||||
$
|
||||
|
||||
With this knowledge in hand, we can start the migration:
|
||||
@@ -338,33 +370,34 @@ With this knowledge in hand, we can start the migration:
|
||||
$ flexnbd mirror --addr 127.0.0.1 --port 4779 \
|
||||
--sock /tmp/flex-source.sock
|
||||
Migration started
|
||||
[1] + 9648 done build/flexnbd serve --addr 0.0.0.0 --port 4778
|
||||
[1] + 9648 done flexnbd serve --addr 0.0.0.0 --port 4778
|
||||
[2] + 9651 done flexnbd listen --addr 0.0.0.0 --port 4779
|
||||
$
|
||||
|
||||
Note that because the file is so small in this case, we see the source
|
||||
server quit soon after we start the migration.
|
||||
|
||||
We can check the status of the destination server, to ensure that it
|
||||
took control:
|
||||
|
||||
$ flexnbd status --sock /tmp/flex-dest.sock
|
||||
is_mirroring=false has_control=true
|
||||
server quit soon after we start the migration, and the destination
|
||||
exited at roughly the same time.
|
||||
|
||||
BUGS
|
||||
----
|
||||
|
||||
Should be reported to alex@bytemark.co.uk.
|
||||
Should be reported on GitHub at
|
||||
|
||||
* https://github.com/BytemarkHosting/flexnbd-c/issues
|
||||
|
||||
AUTHOR
|
||||
------
|
||||
|
||||
Written by Alex Young <alex@bytemark.co.uk>.
|
||||
Original concept and core code by Matthew Bloch
|
||||
<matthew@bytemark.co.uk>.
|
||||
Originally written by Alex Young <alex@blackkettle.org>.
|
||||
Original concept and core code by Matthew Bloch <matthew@bytemark.co.uk>.
|
||||
Proxy mode written by Nick Thomas <me@ur.gs>.
|
||||
|
||||
COPYING
|
||||
-------
|
||||
The full commit history is available on GitHub.
|
||||
|
||||
Copyright (c) 2012 Bytemark Hosting Ltd. Free use of this software is
|
||||
granted under the terms of the GNU General Public License version 3 or
|
||||
later.
|
||||
SEE ALSO
|
||||
|
||||
flexnbd-proxy(1), nbd-client(8), xnbd-server(8), xnbd-client(8)
|
||||
|
||||
COPYRIGHT
|
||||
|
||||
Copyright (c) 2012-2016 Bytemark Hosting Ltd. Free use of this
|
||||
software is granted under the terms of the GNU General Public License
|
||||
version 3 or later.
|
||||
|
274
Rakefile
274
Rakefile
@@ -1,274 +0,0 @@
|
||||
$: << '../rake_utils/lib'
|
||||
require 'rake_utils/debian'
|
||||
include RakeUtils::DSL
|
||||
|
||||
CC=ENV['CC'] || "gcc"
|
||||
|
||||
DEBUG = ENV.has_key?('DEBUG') &&
|
||||
%w|yes y ok 1 true t|.include?(ENV['DEBUG'])
|
||||
|
||||
ALL_SOURCES =FileList['src/*']
|
||||
SOURCES = ALL_SOURCES.select { |c| c =~ /\.c$/ }
|
||||
OBJECTS = SOURCES.pathmap( "%{^src,build}X.o" )
|
||||
TEST_SOURCES = FileList['tests/unit/*.c']
|
||||
TEST_OBJECTS = TEST_SOURCES.pathmap( "%{^tests/unit,build/tests}X.o" )
|
||||
|
||||
LIBS = %w( pthread )
|
||||
CCFLAGS = %w(
|
||||
-D_GNU_SOURCE=1
|
||||
-Wall
|
||||
-Wextra
|
||||
-Werror-implicit-function-declaration
|
||||
-Wstrict-prototypes
|
||||
-Wno-missing-field-initializers
|
||||
) + # Added -Wno-missing-field-initializers to shut GCC up over {0} struct initialisers
|
||||
[ENV['CFLAGS']]
|
||||
LDFLAGS = []
|
||||
LIBCHECK = "/usr/lib/libcheck.a"
|
||||
|
||||
TEST_MODULES = Dir["tests/unit/check_*.c"].map { |n|
|
||||
File.basename( n )[%r{check_(.+)\.c},1] }
|
||||
|
||||
if DEBUG
|
||||
LDFLAGS << ["-g"]
|
||||
CCFLAGS << ["-g -DDEBUG"]
|
||||
end
|
||||
|
||||
desc "Build the binary and man page"
|
||||
task :build => ['build/flexnbd', 'build/flexnbd.1.gz']
|
||||
task :default => :build
|
||||
|
||||
desc "Build just the binary"
|
||||
task :flexnbd => "build/flexnbd"
|
||||
|
||||
def check(m)
|
||||
"build/tests/check_#{m}"
|
||||
end
|
||||
|
||||
file "README.txt"
|
||||
|
||||
file "build/flexnbd.1.gz" => "README.txt" do
|
||||
FileUtils.mkdir_p( "build" )
|
||||
sh "a2x --destination-dir build --format manpage README.txt"
|
||||
sh "gzip build/flexnbd.1"
|
||||
end
|
||||
|
||||
desc "Build just the man page"
|
||||
task :man => "build/flexnbd.1.gz"
|
||||
|
||||
|
||||
namespace "test" do
|
||||
desc "Run all tests"
|
||||
task 'run' => ["unit", "scenarios"]
|
||||
|
||||
desc "Build C tests"
|
||||
task 'build' => TEST_MODULES.map { |n| check n}
|
||||
|
||||
TEST_MODULES.each do |m|
|
||||
desc "Run tests for #{m}"
|
||||
task "check_#{m}" => check(m) do
|
||||
sh check m
|
||||
end
|
||||
end
|
||||
|
||||
desc "Run C tests"
|
||||
task 'unit' => 'build' do
|
||||
TEST_MODULES.each do |n|
|
||||
ENV['EF_DISABLE_BANNER'] = '1'
|
||||
sh check n
|
||||
end
|
||||
end
|
||||
|
||||
desc "Run NBD test scenarios"
|
||||
task 'scenarios' => 'flexnbd' do
|
||||
sh "cd tests/acceptance; ruby nbd_scenarios"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
def gcc_compile( target, source )
|
||||
FileUtils.mkdir_p File.dirname( target )
|
||||
sh "#{CC} -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
|
||||
end
|
||||
|
||||
def gcc_link(target, objects)
|
||||
FileUtils.mkdir_p File.dirname( target )
|
||||
|
||||
sh "#{CC} #{LDFLAGS.join(' ')} "+
|
||||
LIBS.map { |l| "-l#{l}" }.join(" ")+
|
||||
" -Isrc " +
|
||||
" -o #{target} "+
|
||||
objects.join(" ")
|
||||
end
|
||||
|
||||
def headers(c)
|
||||
`#{CC} -Isrc -MM #{c}`.gsub("\\\n", " ").split(" ")[2..-1]
|
||||
end
|
||||
|
||||
rule 'build/flexnbd' => OBJECTS do |t|
|
||||
gcc_link(t.name, t.sources)
|
||||
end
|
||||
|
||||
|
||||
file check("client") =>
|
||||
%w{build/tests/check_client.o
|
||||
build/self_pipe.o
|
||||
build/nbdtypes.o
|
||||
build/listen.o
|
||||
build/flexnbd.o
|
||||
build/flexthread.o
|
||||
build/control.o
|
||||
build/readwrite.o
|
||||
build/parse.o
|
||||
build/client.o
|
||||
build/serve.o
|
||||
build/acl.o
|
||||
build/ioutil.o
|
||||
build/mbox.o
|
||||
build/mirror.o
|
||||
build/status.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("acl") =>
|
||||
%w{build/tests/check_acl.o
|
||||
build/parse.o
|
||||
build/acl.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check( "util" ) =>
|
||||
%w{build/tests/check_util.o
|
||||
build/util.o
|
||||
build/self_pipe.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("serve") =>
|
||||
%w{build/tests/check_serve.o
|
||||
build/self_pipe.o
|
||||
build/nbdtypes.o
|
||||
build/control.o
|
||||
build/readwrite.o
|
||||
build/parse.o
|
||||
build/client.o
|
||||
build/flexthread.o
|
||||
build/serve.o
|
||||
build/flexnbd.o
|
||||
build/mirror.o
|
||||
build/status.o
|
||||
build/listen.o
|
||||
build/acl.o
|
||||
build/mbox.o
|
||||
build/ioutil.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("readwrite") =>
|
||||
%w{build/tests/check_readwrite.o
|
||||
build/readwrite.o
|
||||
build/client.o
|
||||
build/self_pipe.o
|
||||
build/serve.o
|
||||
build/parse.o
|
||||
build/acl.o
|
||||
build/flexthread.o
|
||||
build/control.o
|
||||
build/flexnbd.o
|
||||
build/mirror.o
|
||||
build/status.o
|
||||
build/listen.o
|
||||
build/nbdtypes.o
|
||||
build/mbox.o
|
||||
build/ioutil.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("listen") =>
|
||||
%w{build/tests/check_listen.o
|
||||
build/listen.o
|
||||
build/flexnbd.o
|
||||
build/status.o
|
||||
build/flexthread.o
|
||||
build/mbox.o
|
||||
build/mirror.o
|
||||
build/self_pipe.o
|
||||
build/nbdtypes.o
|
||||
build/control.o
|
||||
build/readwrite.o
|
||||
build/parse.o
|
||||
build/client.o
|
||||
build/serve.o
|
||||
build/acl.o
|
||||
build/ioutil.o
|
||||
build/util.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("flexnbd") =>
|
||||
%w{build/tests/check_flexnbd.o
|
||||
build/flexnbd.o
|
||||
build/ioutil.o
|
||||
build/util.o
|
||||
build/control.o
|
||||
build/listen.o
|
||||
build/mbox.o
|
||||
build/flexthread.o
|
||||
build/status.o
|
||||
build/self_pipe.o
|
||||
build/client.o
|
||||
build/acl.o
|
||||
build/parse.o
|
||||
build/nbdtypes.o
|
||||
build/readwrite.o
|
||||
build/mirror.o
|
||||
build/serve.o} do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
file check("control") =>
|
||||
%w{build/tests/check_control.o} + OBJECTS - ["build/main.o"] do |t|
|
||||
gcc_link t.name, t.prerequisites + [LIBCHECK]
|
||||
end
|
||||
|
||||
(TEST_MODULES- %w{control flexnbd acl client serve readwrite listen util}).each do |m|
|
||||
tgt = "build/tests/check_#{m}.o"
|
||||
maybe_obj_name = "build/#{m}.o"
|
||||
# Take it out in case we're testing util.o or ioutil.o
|
||||
deps = ["build/ioutil.o", "build/util.o"] - [maybe_obj_name]
|
||||
|
||||
# Add it back in if it's something we need to compile
|
||||
deps << maybe_obj_name if OBJECTS.include?( maybe_obj_name )
|
||||
|
||||
file check( m ) => deps + [tgt] do |t|
|
||||
gcc_link(t.name, deps + [tgt, LIBCHECK])
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
OBJECTS.zip( SOURCES ).each do |o,c|
|
||||
file o => [c]+headers(c) do |t| gcc_compile( o, c ) end
|
||||
end
|
||||
|
||||
TEST_OBJECTS.zip( TEST_SOURCES ).each do |o,c|
|
||||
file o => [c] + headers(c) do |t| gcc_compile( o, c ) end
|
||||
end
|
||||
|
||||
desc "Remove all build targets, binaries and temporary files"
|
||||
task :clean do
|
||||
sh "rm -rf *~ build"
|
||||
end
|
||||
|
||||
namespace :pkg do
|
||||
deb do |t|
|
||||
t.code_files = ALL_SOURCES + ["Rakefile", "README.txt"]
|
||||
t.pkg_name = "flexnbd"
|
||||
t.generate_changelog!
|
||||
end
|
||||
end
|
||||
|
2690
debian/changelog
vendored
2690
debian/changelog
vendored
File diff suppressed because it is too large
Load Diff
21
debian/control
vendored
21
debian/control
vendored
@@ -1,14 +1,25 @@
|
||||
Source: flexnbd
|
||||
Section: unknown
|
||||
Section: web
|
||||
Priority: extra
|
||||
Maintainer: Alex Young <alex@bytemark.co.uk>
|
||||
Build-Depends: cdbs, debhelper (>= 7), ruby, rake, gcc
|
||||
Maintainer: Patrick J Cherry <patrick@bytemark.co.uk>
|
||||
Build-Depends: debhelper (>= 7.0.50), ruby, gcc, libev-dev, txt2man, check, net-tools
|
||||
Standards-Version: 3.8.1
|
||||
Homepage: http://bigv.io/
|
||||
Homepage: https://github.com/BytemarkHosting/flexnbd-c
|
||||
|
||||
Package: flexnbd
|
||||
Architecture: any
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, libev4 | libev3
|
||||
Description: FlexNBD server
|
||||
An NBD server offering push-mirroring and intelligent sparse file handling
|
||||
|
||||
Package: flexnbd-dbg
|
||||
Architecture: any
|
||||
Section: debug
|
||||
Priority: extra
|
||||
Depends:
|
||||
flexnbd (= ${binary:Version}),
|
||||
${misc:Depends}
|
||||
Description: debugging symbols for flexnbd
|
||||
An NBD server offering push-mirroring and intelligent sparse file handling
|
||||
.
|
||||
This package contains the debugging symbols for flexnbd.
|
||||
|
3
debian/flexnbd.install
vendored
3
debian/flexnbd.install
vendored
@@ -1,2 +1,3 @@
|
||||
build/flexnbd usr/bin
|
||||
build/flexnbd.1.gz usr/share/man/man1
|
||||
build/flexnbd-proxy usr/bin
|
||||
|
||||
|
2
debian/flexnbd.manpages
vendored
Normal file
2
debian/flexnbd.manpages
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
build/flexnbd.1.gz
|
||||
build/flexnbd-proxy.1.gz
|
13
debian/rules
vendored
13
debian/rules
vendored
@@ -7,8 +7,13 @@
|
||||
%:
|
||||
dh $@
|
||||
|
||||
override_dh_auto_build:
|
||||
rake build
|
||||
override_dh_strip:
|
||||
dh_strip --dbg-package=flexnbd-dbg
|
||||
|
||||
#
|
||||
# TODO: The ruby test suites don't work during buiding in a chroot, so leave
|
||||
# them out for now.
|
||||
#
|
||||
#override_dh_auto_test:
|
||||
# rake test:run
|
||||
|
||||
override_dh_auto_clean:
|
||||
rake clean
|
||||
|
2
debian/source/format
vendored
2
debian/source/format
vendored
@@ -1 +1 @@
|
||||
3.0 (native)
|
||||
3.0 (quilt)
|
||||
|
194
src/bitset.h
194
src/bitset.h
@@ -1,194 +0,0 @@
|
||||
#ifndef BITSET_H
|
||||
#define BITSET_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
||||
|
||||
static inline char char_with_bit_set(int num) { return 1<<(num%8); }
|
||||
|
||||
/** Return 1 if the bit at ''idx'' in array ''b'' is set */
|
||||
static inline int bit_is_set(char* b, int idx) {
|
||||
return (b[idx/8] & char_with_bit_set(idx)) != 0;
|
||||
}
|
||||
/** Return 1 if the bit at ''idx'' in array ''b'' is clear */
|
||||
static inline int bit_is_clear(char* b, int idx) {
|
||||
return !bit_is_set(b, idx);
|
||||
}
|
||||
/** Tests whether the bit at ''idx'' in array ''b'' has value ''value'' */
|
||||
static inline int bit_has_value(char* b, int idx, int value) {
|
||||
if (value) { return bit_is_set(b, idx); }
|
||||
else { return bit_is_clear(b, idx); }
|
||||
}
|
||||
/** Sets the bit ''idx'' in array ''b'' */
|
||||
static inline void bit_set(char* b, int idx) {
|
||||
b[idx/8] |= char_with_bit_set(idx);
|
||||
//__sync_fetch_and_or(b+(idx/8), char_with_bit_set(idx));
|
||||
}
|
||||
/** Clears the bit ''idx'' in array ''b'' */
|
||||
static inline void bit_clear(char* b, int idx) {
|
||||
b[idx/8] &= ~char_with_bit_set(idx);
|
||||
//__sync_fetch_and_nand(b+(idx/8), char_with_bit_set(idx));
|
||||
}
|
||||
/** Sets ''len'' bits in array ''b'' starting at offset ''from'' */
|
||||
static inline void bit_set_range(char* b, int from, int len) {
|
||||
for (; from%8 != 0 && len > 0; len--) { bit_set(b, from++); }
|
||||
if (len >= 8) { memset(b+(from/8), 255, len/8); }
|
||||
for (; len > 0; len--) { bit_set(b, from++); }
|
||||
}
|
||||
/** Clears ''len'' bits in array ''b'' starting at offset ''from'' */
|
||||
static inline void bit_clear_range(char* b, int from, int len) {
|
||||
for (; from%8 != 0 && len > 0; len--) { bit_clear(b, from++); }
|
||||
if (len >= 8) { memset(b+(from/8), 0, len/8); }
|
||||
for (; len > 0; len--) { bit_clear(b, from++); }
|
||||
}
|
||||
|
||||
/** Counts the number of contiguous bits in array ''b'', starting at ''from''
|
||||
* up to a maximum number of bits ''len''. Returns the number of contiguous
|
||||
* bits that are the same as the first one specified.
|
||||
*/
|
||||
static inline int bit_run_count(char* b, int from, int len) {
|
||||
int count;
|
||||
int first_value = bit_is_set(b, from);
|
||||
|
||||
for (count=0; len > 0 && bit_has_value(b, from+count, first_value); count++, len--)
|
||||
;
|
||||
|
||||
/* FIXME: debug this later */
|
||||
/*for (; (from+count) % 64 != 0 && len > 0; len--)
|
||||
if (bit_has_value(b, from+count, first_value))
|
||||
count++;
|
||||
else
|
||||
return count;
|
||||
for (; len >= 64; len-=64) {
|
||||
if (*((uint64_t*)(b + ((from+count)/8))) == UINT64_MAX)
|
||||
count += 64;
|
||||
else
|
||||
break;
|
||||
}
|
||||
for (; len > 0; len--)
|
||||
if (bit_is_set(b, from+count))
|
||||
count++;*/
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/** An application of a bitset - a bitset mapping represents a file of ''size''
|
||||
* broken down into ''resolution''-sized chunks. The bit set is assumed to
|
||||
* represent one bit per chunk.
|
||||
*/
|
||||
struct bitset_mapping {
|
||||
uint64_t size;
|
||||
int resolution;
|
||||
char bits[];
|
||||
};
|
||||
|
||||
/** Allocate a bitset_mapping for a file of the given size, and chunks of the
|
||||
* given resolution.
|
||||
*/
|
||||
static inline struct bitset_mapping* bitset_alloc(
|
||||
uint64_t size,
|
||||
int resolution
|
||||
)
|
||||
{
|
||||
struct bitset_mapping *bitset = xmalloc(
|
||||
sizeof(struct bitset_mapping)+
|
||||
(size+resolution-1)/resolution
|
||||
);
|
||||
bitset->size = size;
|
||||
bitset->resolution = resolution;
|
||||
return bitset;
|
||||
}
|
||||
|
||||
#define INT_FIRST_AND_LAST \
|
||||
int first = from/set->resolution, \
|
||||
last = (from+len-1)/set->resolution, \
|
||||
bitlen = last-first+1
|
||||
|
||||
/** Set the bits in a bitset which correspond to the given bytes in the larger
|
||||
* file.
|
||||
*/
|
||||
static inline void bitset_set_range(
|
||||
struct bitset_mapping* set,
|
||||
uint64_t from,
|
||||
uint64_t len)
|
||||
{
|
||||
INT_FIRST_AND_LAST;
|
||||
bit_set_range(set->bits, first, bitlen);
|
||||
}
|
||||
|
||||
|
||||
/** Set every bit in the bitset. */
|
||||
static inline void bitset_set(
|
||||
struct bitset_mapping* set
|
||||
)
|
||||
{
|
||||
bitset_set_range(set, 0, set->size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Clear the bits in a bitset which correspond to the given bytes in the
|
||||
* larger file.
|
||||
*/
|
||||
static inline void bitset_clear_range(
|
||||
struct bitset_mapping* set,
|
||||
uint64_t from,
|
||||
uint64_t len)
|
||||
{
|
||||
INT_FIRST_AND_LAST;
|
||||
bit_clear_range(set->bits, first, bitlen);
|
||||
}
|
||||
|
||||
|
||||
/** Clear every bit in the bitset. */
|
||||
static inline void bitset_clear(
|
||||
struct bitset_mapping *set
|
||||
)
|
||||
{
|
||||
bitset_clear_range(set, 0, set->size);
|
||||
}
|
||||
|
||||
|
||||
/** Counts the number of contiguous bytes that are represented as a run in
|
||||
* the bit field.
|
||||
*/
|
||||
static inline int bitset_run_count(
|
||||
struct bitset_mapping* set,
|
||||
uint64_t from,
|
||||
uint64_t len)
|
||||
{
|
||||
/* now fix in case len goes past the end of the memory we have
|
||||
* control of */
|
||||
len = len+from>set->size ? set->size-from : len;
|
||||
INT_FIRST_AND_LAST;
|
||||
return (bit_run_count(set->bits, first, bitlen) * set->resolution) -
|
||||
(from % set->resolution);
|
||||
}
|
||||
|
||||
/** Tests whether the bit field is clear for the given file offset.
|
||||
*/
|
||||
static inline int bitset_is_clear_at(
|
||||
struct bitset_mapping* set,
|
||||
uint64_t at
|
||||
)
|
||||
{
|
||||
return bit_is_clear(set->bits, at/set->resolution);
|
||||
}
|
||||
|
||||
/** Tests whether the bit field is set for the given file offset.
|
||||
*/
|
||||
static inline int bitset_is_set_at(
|
||||
struct bitset_mapping* set,
|
||||
uint64_t at
|
||||
)
|
||||
{
|
||||
return bit_is_set(set->bits, at/set->resolution);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
351
src/common/ioutil.c
Normal file
351
src/common/ioutil.c
Normal file
@@ -0,0 +1,351 @@
|
||||
#include <sys/mman.h>
|
||||
#include <sys/sendfile.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/types.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/fiemap.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "bitset.h"
|
||||
#include "ioutil.h"
|
||||
|
||||
|
||||
int build_allocation_map(struct bitset * allocation_map, int fd)
|
||||
{
|
||||
/* break blocking ioctls down */
|
||||
const unsigned long max_length = 100*1024*1024;
|
||||
const unsigned int max_extents = 1000;
|
||||
|
||||
unsigned long offset = 0;
|
||||
|
||||
struct {
|
||||
struct fiemap fiemap;
|
||||
struct fiemap_extent extents[max_extents];
|
||||
} fiemap_static;
|
||||
struct fiemap* fiemap = (struct fiemap*) &fiemap_static;
|
||||
|
||||
memset(&fiemap_static, 0, sizeof(fiemap_static));
|
||||
|
||||
for (offset = 0; offset < allocation_map->size; ) {
|
||||
|
||||
fiemap->fm_start = offset;
|
||||
|
||||
fiemap->fm_length = max_length;
|
||||
if ( offset + max_length > allocation_map->size ) {
|
||||
fiemap->fm_length = allocation_map->size-offset;
|
||||
}
|
||||
|
||||
fiemap->fm_flags = FIEMAP_FLAG_SYNC;
|
||||
fiemap->fm_extent_count = max_extents;
|
||||
fiemap->fm_mapped_extents = 0;
|
||||
|
||||
if ( ioctl( fd, FS_IOC_FIEMAP, fiemap ) < 0 ) {
|
||||
debug( "Couldn't get fiemap, returning no allocation_map" );
|
||||
return 0; /* it's up to the caller to free the map */
|
||||
}
|
||||
else {
|
||||
for ( unsigned int i = 0; i < fiemap->fm_mapped_extents; i++ ) {
|
||||
bitset_set_range( allocation_map,
|
||||
fiemap->fm_extents[i].fe_logical,
|
||||
fiemap->fm_extents[i].fe_length );
|
||||
}
|
||||
|
||||
|
||||
/* must move the offset on, but careful not to jump max_length
|
||||
* if we've actually hit max_offsets.
|
||||
*/
|
||||
if (fiemap->fm_mapped_extents > 0) {
|
||||
struct fiemap_extent *last = &fiemap->fm_extents[
|
||||
fiemap->fm_mapped_extents-1
|
||||
];
|
||||
offset = last->fe_logical + last->fe_length;
|
||||
}
|
||||
else {
|
||||
offset += fiemap->fm_length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info("Successfully built allocation map");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int open_and_mmap(const char* filename, int* out_fd, uint64_t *out_size, void **out_map)
|
||||
{
|
||||
/*
|
||||
* size and out_size are intentionally of different types.
|
||||
* lseek64() uses off64_t to signal errors in the sign bit.
|
||||
* Since we check for these errors before trying to assign to
|
||||
* *out_size, we know *out_size can never go negative.
|
||||
*/
|
||||
off64_t size;
|
||||
|
||||
/* O_DIRECT should not be used with mmap() */
|
||||
*out_fd = open(filename, O_RDWR | O_SYNC );
|
||||
|
||||
if (*out_fd < 1) {
|
||||
warn("open(%s) failed: does it exist?", filename);
|
||||
return *out_fd;
|
||||
}
|
||||
|
||||
size = lseek64(*out_fd, 0, SEEK_END);
|
||||
if (size < 0) {
|
||||
warn("lseek64() failed");
|
||||
return size;
|
||||
}
|
||||
if (out_size) {
|
||||
*out_size = size;
|
||||
}
|
||||
|
||||
if (out_map) {
|
||||
*out_map = mmap64(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED,
|
||||
*out_fd, 0);
|
||||
if (((long) *out_map) == -1) {
|
||||
warn("mmap64() failed");
|
||||
return -1;
|
||||
}
|
||||
debug("opened %s size %ld on fd %d @ %p", filename, size, *out_fd, *out_map);
|
||||
}
|
||||
else {
|
||||
debug("opened %s size %ld on fd %d", filename, size, *out_fd);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int writeloop(int filedes, const void *buffer, size_t size)
|
||||
{
|
||||
size_t written=0;
|
||||
while (written < size) {
|
||||
ssize_t result = write(filedes, buffer+written, size-written);
|
||||
if (result == -1) {
|
||||
if ( errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK ) {
|
||||
continue; // busy-wait
|
||||
}
|
||||
return -1; // failure
|
||||
}
|
||||
written += result;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int readloop(int filedes, void *buffer, size_t size)
|
||||
{
|
||||
size_t readden=0;
|
||||
while (readden < size) {
|
||||
ssize_t result = read(filedes, buffer+readden, size-readden);
|
||||
|
||||
if ( result == 0 /* EOF */ ) {
|
||||
warn( "end-of-file detected while reading after %i bytes", readden );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( result == -1 ) {
|
||||
if ( errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK ) {
|
||||
continue; // busy-wait
|
||||
}
|
||||
return -1; // failure
|
||||
}
|
||||
readden += result;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sendfileloop(int out_fd, int in_fd, off64_t *offset, size_t count)
|
||||
{
|
||||
size_t sent=0;
|
||||
while (sent < count) {
|
||||
ssize_t result = sendfile64(out_fd, in_fd, offset, count-sent);
|
||||
debug("sendfile64(out_fd=%d, in_fd=%d, offset=%p, count-sent=%ld) = %ld", out_fd, in_fd, offset, count-sent, result);
|
||||
|
||||
if (result == -1) {
|
||||
debug( "%s (%i) calling sendfile64()", strerror(errno), errno );
|
||||
return -1;
|
||||
}
|
||||
sent += result;
|
||||
debug("sent=%ld, count=%ld", sent, count);
|
||||
}
|
||||
debug("exiting sendfileloop");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include <errno.h>
|
||||
ssize_t spliceloop(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags2)
|
||||
{
|
||||
const unsigned int flags = SPLICE_F_MORE|SPLICE_F_MOVE|flags2;
|
||||
size_t spliced=0;
|
||||
|
||||
//debug("spliceloop(%d, %ld, %d, %ld, %ld)", fd_in, off_in ? *off_in : 0, fd_out, off_out ? *off_out : 0, len);
|
||||
|
||||
while (spliced < len) {
|
||||
ssize_t result = splice(fd_in, off_in, fd_out, off_out, len, flags);
|
||||
if (result < 0) {
|
||||
//debug("result=%ld (%s), spliced=%ld, len=%ld", result, strerror(errno), spliced, len);
|
||||
if (errno == EAGAIN && (flags & SPLICE_F_NONBLOCK) ) {
|
||||
return spliced;
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
spliced += result;
|
||||
//debug("result=%ld (%s), spliced=%ld, len=%ld", result, strerror(errno), spliced, len);
|
||||
}
|
||||
}
|
||||
|
||||
return spliced;
|
||||
}
|
||||
|
||||
int splice_via_pipe_loop(int fd_in, int fd_out, size_t len)
|
||||
{
|
||||
|
||||
int pipefd[2]; /* read end, write end */
|
||||
size_t spliced=0;
|
||||
|
||||
if (pipe(pipefd) == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (spliced < len) {
|
||||
ssize_t run = len-spliced;
|
||||
ssize_t s2, s1 = spliceloop(fd_in, NULL, pipefd[1], NULL, run, SPLICE_F_NONBLOCK);
|
||||
/*if (run > 65535)
|
||||
run = 65535;*/
|
||||
if (s1 < 0) { break; }
|
||||
|
||||
s2 = spliceloop(pipefd[0], NULL, fd_out, NULL, s1, 0);
|
||||
if (s2 < 0) { break; }
|
||||
spliced += s2;
|
||||
}
|
||||
close(pipefd[0]);
|
||||
close(pipefd[1]);
|
||||
|
||||
return spliced < len ? -1 : 0;
|
||||
}
|
||||
|
||||
/* Reads single bytes from fd until either an EOF or a newline appears.
|
||||
* If an EOF occurs before a newline, returns -1. The line is lost.
|
||||
* Inserts the read bytes (without the newline) into buf, followed by a
|
||||
* trailing NULL.
|
||||
* Returns the number of read bytes: the length of the line without the
|
||||
* newline, plus the trailing null.
|
||||
*/
|
||||
int read_until_newline(int fd, char* buf, int bufsize)
|
||||
{
|
||||
int cur;
|
||||
|
||||
for (cur=0; cur < bufsize; cur++) {
|
||||
int result = read(fd, buf+cur, 1);
|
||||
if (result <= 0) { return -1; }
|
||||
if (buf[cur] == 10) {
|
||||
buf[cur] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return cur+1;
|
||||
}
|
||||
|
||||
int read_lines_until_blankline(int fd, int max_line_length, char ***lines)
|
||||
{
|
||||
int lines_count = 0;
|
||||
char line[max_line_length+1];
|
||||
*lines = NULL;
|
||||
|
||||
memset(line, 0, max_line_length+1);
|
||||
|
||||
while (1) {
|
||||
int readden = read_until_newline(fd, line, max_line_length);
|
||||
/* readden will be:
|
||||
* 1 for an empty line
|
||||
* -1 for an eof
|
||||
* -1 for a read error
|
||||
*/
|
||||
if (readden <= 1) { return lines_count; }
|
||||
*lines = xrealloc(*lines, (lines_count+1) * sizeof(char*));
|
||||
(*lines)[lines_count] = strdup(line);
|
||||
if ((*lines)[lines_count][0] == 0) {
|
||||
return lines_count;
|
||||
}
|
||||
lines_count++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int fd_is_closed( int fd_in )
|
||||
{
|
||||
int errno_old = errno;
|
||||
int result = fcntl( fd_in, F_GETFL ) < 0;
|
||||
errno = errno_old;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static inline int io_errno_permanent(void)
|
||||
{
|
||||
return ( errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR );
|
||||
}
|
||||
|
||||
|
||||
/* Returns -1 if the operation failed, or the number of bytes read if all is
|
||||
* well. Note that 0 bytes may be returned. Unlike read(), this is not an EOF! */
|
||||
ssize_t iobuf_read(int fd, struct iobuf *iobuf, size_t default_size )
|
||||
{
|
||||
size_t left;
|
||||
ssize_t count;
|
||||
|
||||
if ( iobuf->needle == 0 ) {
|
||||
iobuf->size = default_size;
|
||||
}
|
||||
|
||||
left = iobuf->size - iobuf->needle;
|
||||
debug( "Reading %"PRIu32" of %"PRIu32" bytes from fd %i", left, iobuf->size, fd );
|
||||
|
||||
count = read( fd, iobuf->buf + iobuf->needle, left );
|
||||
|
||||
if ( count > 0 ) {
|
||||
iobuf->needle += count;
|
||||
debug( "read() returned %"PRIu32" bytes", count );
|
||||
} else if ( count == 0 ) {
|
||||
warn( "read() returned EOF on fd %i", fd );
|
||||
errno = 0;
|
||||
return -1;
|
||||
} else if ( count == -1 ) {
|
||||
if ( io_errno_permanent() ) {
|
||||
warn( SHOW_ERRNO( "read() failed on fd %i", fd ) );
|
||||
} else {
|
||||
debug( SHOW_ERRNO( "read() returned 0 bytes" ) );
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
ssize_t iobuf_write( int fd, struct iobuf *iobuf )
|
||||
{
|
||||
size_t left = iobuf->size - iobuf->needle;
|
||||
ssize_t count;
|
||||
|
||||
debug( "Writing %"PRIu32" of %"PRIu32" bytes to fd %i", left, iobuf->size, fd );
|
||||
count = write( fd, iobuf->buf + iobuf->needle, left );
|
||||
|
||||
if ( count >= 0 ) {
|
||||
iobuf->needle += count;
|
||||
debug( "write() returned %"PRIu32" bytes", count );
|
||||
} else {
|
||||
if ( io_errno_permanent() ) {
|
||||
warn( SHOW_ERRNO( "write() failed on fd %i", fd ) );
|
||||
} else {
|
||||
debug( SHOW_ERRNO( "write() returned 0 bytes" ) );
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
@@ -1,16 +1,26 @@
|
||||
#ifndef __IOUTIL_H
|
||||
#define __IOUTIL_H
|
||||
|
||||
#include "serve.h"
|
||||
struct bitset_mapping; /* don't need whole of bitset.h here */
|
||||
#include <sys/types.h>
|
||||
struct iobuf {
|
||||
unsigned char *buf;
|
||||
size_t size;
|
||||
size_t needle;
|
||||
};
|
||||
|
||||
/** Returns a bit field representing which blocks are allocated in file
|
||||
* descriptor ''fd''. You must supply the size, and the resolution at which
|
||||
* you want the bits to represent allocated blocks. If the OS represents
|
||||
* allocated blocks at a finer resolution than you've asked for, any block
|
||||
* or part block will count as "allocated" with the corresponding bit set.
|
||||
ssize_t iobuf_read( int fd, struct iobuf* iobuf, size_t default_size );
|
||||
ssize_t iobuf_write( int fd, struct iobuf* iobuf );
|
||||
|
||||
#include "serve.h"
|
||||
struct bitset; /* don't need whole of bitset.h here */
|
||||
|
||||
/** Scan the file opened in ''fd'', set bits in ''allocation_map'' that
|
||||
* correspond to which blocks are physically allocated on disc (or part-
|
||||
* allocated). If the OS represents allocated blocks at a finer resolution
|
||||
* than you've asked for, any block or part block will count as "allocated"
|
||||
* with the corresponding bit set. Returns 1 if successful, 0 otherwise.
|
||||
*/
|
||||
struct bitset_mapping* build_allocation_map(int fd, off64_t size, int resolution);
|
||||
int build_allocation_map(struct bitset * allocation_map, int fd);
|
||||
|
||||
/** Repeat a write() operation that succeeds partially until ''size'' bytes
|
||||
* are written, or an error is returned, when it returns -1 as usual.
|
||||
@@ -55,7 +65,7 @@ int read_lines_until_blankline(int fd, int max_line_length, char ***lines);
|
||||
* ''out_size'' and the address of the mmap in ''out_map''. If anything goes
|
||||
* wrong, returns -1 setting errno, otherwise 0.
|
||||
*/
|
||||
int open_and_mmap( const char* filename, int* out_fd, off64_t *out_size, void **out_map);
|
||||
int open_and_mmap( const char* filename, int* out_fd, uint64_t* out_size, void **out_map);
|
||||
|
||||
|
||||
/** Check to see whether the given file descriptor is closed.
|
@@ -7,20 +7,25 @@ void mode(char* mode, int argc, char **argv);
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
#define GETOPT_ARG(x,s) {(x), 1, 0, (s)}
|
||||
#define GETOPT_FLAG(x,v) {(x), 0, 0, (v)}
|
||||
#define GETOPT_ARG(x,s) {(x), required_argument, 0, (s)}
|
||||
#define GETOPT_FLAG(x,v) {(x), no_argument, 0, (v)}
|
||||
#define GETOPT_OPTARG(x,s) {(x), optional_argument, 0, (s)}
|
||||
|
||||
#define OPT_HELP "help"
|
||||
#define OPT_ADDR "addr"
|
||||
#define OPT_REBIND_ADDR "rebind-addr"
|
||||
#define OPT_BIND "bind"
|
||||
#define OPT_PORT "port"
|
||||
#define OPT_REBIND_PORT "rebind-port"
|
||||
#define OPT_FILE "file"
|
||||
#define OPT_SOCK "sock"
|
||||
#define OPT_FROM "from"
|
||||
#define OPT_SIZE "size"
|
||||
#define OPT_DENY "default-deny"
|
||||
#define OPT_CACHE "cache"
|
||||
#define OPT_UNLINK "unlink"
|
||||
#define OPT_CONNECT_ADDR "conn-addr"
|
||||
#define OPT_CONNECT_PORT "conn-port"
|
||||
#define OPT_KILLSWITCH "killswitch"
|
||||
#define OPT_MAX_SPEED "max-speed"
|
||||
|
||||
#define CMD_SERVE "serve"
|
||||
#define CMD_LISTEN "listen"
|
||||
@@ -28,9 +33,11 @@ void mode(char* mode, int argc, char **argv);
|
||||
#define CMD_WRITE "write"
|
||||
#define CMD_ACL "acl"
|
||||
#define CMD_MIRROR "mirror"
|
||||
#define CMD_MIRROR_SPEED "mirror-speed"
|
||||
#define CMD_BREAK "break"
|
||||
#define CMD_STATUS "status"
|
||||
#define CMD_HELP "help"
|
||||
#define LEN_CMD_MAX 7
|
||||
#define LEN_CMD_MAX 13
|
||||
|
||||
#define PATH_LEN_MAX 1024
|
||||
#define ADDR_LEN_MAX 64
|
||||
@@ -40,16 +47,19 @@ void mode(char* mode, int argc, char **argv);
|
||||
|
||||
#define GETOPT_HELP GETOPT_FLAG( OPT_HELP, 'h' )
|
||||
#define GETOPT_DENY GETOPT_FLAG( OPT_DENY, 'd' )
|
||||
|
||||
#define GETOPT_ADDR GETOPT_ARG( OPT_ADDR, 'l' )
|
||||
#define GETOPT_REBIND_ADDR GETOPT_ARG( OPT_REBIND_ADDR, 'L')
|
||||
#define GETOPT_PORT GETOPT_ARG( OPT_PORT, 'p' )
|
||||
#define GETOPT_REBIND_PORT GETOPT_ARG( OPT_REBIND_PORT, 'P')
|
||||
#define GETOPT_FILE GETOPT_ARG( OPT_FILE, 'f' )
|
||||
#define GETOPT_SOCK GETOPT_ARG( OPT_SOCK, 's' )
|
||||
#define GETOPT_FROM GETOPT_ARG( OPT_FROM, 'F' )
|
||||
#define GETOPT_SIZE GETOPT_ARG( OPT_SIZE, 'S' )
|
||||
#define GETOPT_BIND GETOPT_ARG( OPT_BIND, 'b' )
|
||||
#define GETOPT_CACHE GETOPT_OPTARG( OPT_CACHE, 'c' )
|
||||
#define GETOPT_UNLINK GETOPT_ARG( OPT_UNLINK, 'u' )
|
||||
#define GETOPT_CONNECT_ADDR GETOPT_ARG( OPT_CONNECT_ADDR, 'C' )
|
||||
#define GETOPT_CONNECT_PORT GETOPT_ARG( OPT_CONNECT_PORT, 'P' )
|
||||
#define GETOPT_KILLSWITCH GETOPT_ARG( OPT_KILLSWITCH, 'k' )
|
||||
#define GETOPT_MAX_SPEED GETOPT_ARG( OPT_MAX_SPEED, 'm' )
|
||||
|
||||
#define OPT_VERBOSE "verbose"
|
||||
#define SOPT_VERBOSE "v"
|
||||
@@ -63,6 +73,8 @@ void mode(char* mode, int argc, char **argv);
|
||||
# define VERBOSE_LOG_LEVEL 1
|
||||
#endif
|
||||
|
||||
#define QUIET_LOG_LEVEL 4
|
||||
|
||||
#define OPT_QUIET "quiet"
|
||||
#define SOPT_QUIET "q"
|
||||
#define GETOPT_QUIET GETOPT_FLAG( OPT_QUIET, 'q' )
|
||||
@@ -76,8 +88,10 @@ void mode(char* mode, int argc, char **argv);
|
||||
"\t--" OPT_SOCK ",-s <SOCK>\tPath to the control socket.\n"
|
||||
#define BIND_LINE \
|
||||
"\t--" OPT_BIND ",-b <BIND-ADDR>\tBind the local socket to a particular IP address.\n"
|
||||
|
||||
#define MAX_SPEED_LINE \
|
||||
"\t--" OPT_MAX_SPEED ",-m <bps>\tMaximum speed of the migration, in bytes/sec.\n"
|
||||
|
||||
char * help_help_text;
|
||||
|
||||
#endif
|
||||
|
@@ -27,7 +27,7 @@ void nbd_r2h_request( struct nbd_request_raw *from, struct nbd_request * to )
|
||||
{
|
||||
to->magic = htobe32( from->magic );
|
||||
to->type = htobe32( from->type );
|
||||
memcpy( to->handle, from->handle, 8 );
|
||||
to->handle.w = from->handle.w;
|
||||
to->from = htobe64( from->from );
|
||||
to->len = htobe32( from->len );
|
||||
}
|
||||
@@ -36,7 +36,7 @@ void nbd_h2r_request( struct nbd_request * from, struct nbd_request_raw * to )
|
||||
{
|
||||
to->magic = be32toh( from->magic );
|
||||
to->type = be32toh( from->type );
|
||||
memcpy( to->handle, from->handle, 8 );
|
||||
to->handle.w = from->handle.w;
|
||||
to->from = be64toh( from->from );
|
||||
to->len = be32toh( from->len );
|
||||
}
|
||||
@@ -46,12 +46,13 @@ void nbd_r2h_reply( struct nbd_reply_raw * from, struct nbd_reply * to )
|
||||
{
|
||||
to->magic = htobe32( from->magic );
|
||||
to->error = htobe32( from->error );
|
||||
memcpy( to->handle, from->handle, 8 );
|
||||
to->handle.w = from->handle.w;
|
||||
}
|
||||
|
||||
void nbd_h2r_reply( struct nbd_reply * from, struct nbd_reply_raw * to )
|
||||
{
|
||||
to->magic = be32toh( from->magic );
|
||||
to->error = be32toh( from->error );
|
||||
memcpy( to->handle, from->handle, 8 );
|
||||
to->handle.w = from->handle.w;
|
||||
}
|
||||
|
@@ -10,11 +10,25 @@
|
||||
#define REQUEST_READ 0
|
||||
#define REQUEST_WRITE 1
|
||||
#define REQUEST_DISCONNECT 2
|
||||
#define REQUEST_ENTRUST (1<<16)
|
||||
|
||||
/* The top 2 bytes of the type field are overloaded and can contain flags */
|
||||
#define REQUEST_MASK 0x0000ffff
|
||||
|
||||
|
||||
/* 1MiB is the de-facto standard for maximum size of header + data */
|
||||
#define NBD_MAX_SIZE ( 1024 * 1024 )
|
||||
|
||||
#define NBD_REQUEST_SIZE ( sizeof( struct nbd_request_raw ) )
|
||||
#define NBD_REPLY_SIZE ( sizeof( struct nbd_reply_raw ) )
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
typedef union nbd_handle_t {
|
||||
uint8_t b[8];
|
||||
uint64_t w;
|
||||
} nbd_handle_t;
|
||||
|
||||
/* The _raw types are the types as they appear on the wire. Non-_raw
|
||||
* types are in host-format.
|
||||
* Conversion functions are _r2h_ for converting raw to host, and _h2r_
|
||||
@@ -30,7 +44,7 @@ struct nbd_init_raw {
|
||||
struct nbd_request_raw {
|
||||
__be32 magic;
|
||||
__be32 type; /* == READ || == WRITE */
|
||||
char handle[8];
|
||||
nbd_handle_t handle;
|
||||
__be64 from;
|
||||
__be32 len;
|
||||
} __attribute__((packed));
|
||||
@@ -38,7 +52,7 @@ struct nbd_request_raw {
|
||||
struct nbd_reply_raw {
|
||||
__be32 magic;
|
||||
__be32 error; /* 0 = ok, else error */
|
||||
char handle[8]; /* handle you got from request */
|
||||
nbd_handle_t handle; /* handle you got from request */
|
||||
};
|
||||
|
||||
|
||||
@@ -52,8 +66,8 @@ struct nbd_init {
|
||||
|
||||
struct nbd_request {
|
||||
uint32_t magic;
|
||||
uint32_t type; /* == READ || == WRITE */
|
||||
char handle[8];
|
||||
uint32_t type; /* == READ || == WRITE || == DISCONNECT */
|
||||
nbd_handle_t handle;
|
||||
uint64_t from;
|
||||
uint32_t len;
|
||||
} __attribute__((packed));
|
||||
@@ -61,10 +75,9 @@ struct nbd_request {
|
||||
struct nbd_reply {
|
||||
uint32_t magic;
|
||||
uint32_t error; /* 0 = ok, else error */
|
||||
char handle[8]; /* handle you got from request */
|
||||
nbd_handle_t handle; /* handle you got from request */
|
||||
};
|
||||
|
||||
|
||||
void nbd_r2h_init( struct nbd_init_raw * from, struct nbd_init * to );
|
||||
void nbd_r2h_request( struct nbd_request_raw *from, struct nbd_request * to );
|
||||
void nbd_r2h_reply( struct nbd_reply_raw * from, struct nbd_reply * to );
|
@@ -8,6 +8,7 @@ int atoi(const char *nptr);
|
||||
((x) >= 'A' && (x) <= 'F' ) || \
|
||||
(x) == ':' || (x) == '.' \
|
||||
)
|
||||
|
||||
/* FIXME: should change this to return negative on error like everything else */
|
||||
int parse_ip_to_sockaddr(struct sockaddr* out, char* src)
|
||||
{
|
||||
@@ -47,6 +48,22 @@ int parse_ip_to_sockaddr(struct sockaddr* out, char* src)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int parse_to_sockaddr(struct sockaddr* out, char* address)
|
||||
{
|
||||
struct sockaddr_un* un = (struct sockaddr_un*) out;
|
||||
|
||||
NULLCHECK( address );
|
||||
|
||||
if ( address[0] == '/' ) {
|
||||
un->sun_family = AF_UNIX;
|
||||
strncpy( un->sun_path, address, 108 ); /* FIXME: linux only */
|
||||
return 1;
|
||||
}
|
||||
|
||||
return parse_ip_to_sockaddr( out, address );
|
||||
}
|
||||
|
||||
int parse_acl(struct ip_and_mask (**out)[], int max, char **entries)
|
||||
{
|
||||
struct ip_and_mask* list;
|
@@ -2,6 +2,8 @@
|
||||
#define PARSE_H
|
||||
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <unistd.h>
|
||||
|
||||
@@ -10,6 +12,7 @@ union mysockaddr {
|
||||
struct sockaddr generic;
|
||||
struct sockaddr_in v4;
|
||||
struct sockaddr_in6 v6;
|
||||
struct sockaddr_un un;
|
||||
};
|
||||
|
||||
struct ip_and_mask {
|
||||
@@ -18,6 +21,7 @@ struct ip_and_mask {
|
||||
};
|
||||
|
||||
int parse_ip_to_sockaddr(struct sockaddr* out, char* src);
|
||||
int parse_to_sockaddr(struct sockaddr* out, char* src);
|
||||
int parse_acl(struct ip_and_mask (**out)[], int max, char **entries);
|
||||
void parse_port( char *s_port, struct sockaddr_in *out );
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#include "nbdtypes.h"
|
||||
#include "ioutil.h"
|
||||
#include "sockutil.h"
|
||||
#include "util.h"
|
||||
#include "serve.h"
|
||||
|
||||
@@ -17,51 +18,94 @@ int socket_connect(struct sockaddr* to, struct sockaddr* from)
|
||||
|
||||
if (NULL != from) {
|
||||
if ( 0 > bind( fd, from, sizeof(struct sockaddr_in6 ) ) ){
|
||||
warn( "bind() failed");
|
||||
close( fd );
|
||||
warn( SHOW_ERRNO( "bind() to source address failed" ) );
|
||||
if ( 0 > close( fd ) ) { /* Non-fatal leak */
|
||||
warn( SHOW_ERRNO( "Failed to close fd %i", fd ) );
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( 0 > connect(fd, to, sizeof(struct sockaddr_in6)) ) {
|
||||
warn( "connect failed" );
|
||||
close( fd );
|
||||
if ( 0 > sock_try_connect( fd, to, sizeof( struct sockaddr_in6 ), 15 ) ) {
|
||||
warn( SHOW_ERRNO( "connect failed" ) );
|
||||
if ( 0 > close( fd ) ) { /* Non-fatal leak */
|
||||
warn( SHOW_ERRNO( "Failed to close fd %i", fd ) );
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( sock_set_tcp_nodelay( fd, 1 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to set TCP_NODELAY" ) );
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
int socket_nbd_read_hello(int fd, off64_t * out_size)
|
||||
int nbd_check_hello( struct nbd_init_raw* init_raw, uint64_t* out_size )
|
||||
{
|
||||
struct nbd_init init;
|
||||
if ( 0 > readloop(fd, &init, sizeof(init)) ) {
|
||||
warn( "Couldn't read init" );
|
||||
goto fail;
|
||||
}
|
||||
if (strncmp(init.passwd, INIT_PASSWD, 8) != 0) {
|
||||
if ( strncmp( init_raw->passwd, INIT_PASSWD, 8 ) != 0 ) {
|
||||
warn( "wrong passwd" );
|
||||
goto fail;
|
||||
}
|
||||
if (be64toh(init.magic) != INIT_MAGIC) {
|
||||
warn("wrong magic (%x)", be64toh(init.magic));
|
||||
if ( be64toh( init_raw->magic ) != INIT_MAGIC ) {
|
||||
warn( "wrong magic (%x)", be64toh( init_raw->magic ) );
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if ( NULL != out_size ) {
|
||||
*out_size = be64toh(init.size);
|
||||
*out_size = be64toh( init_raw->size );
|
||||
}
|
||||
|
||||
return 1;
|
||||
fail:
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
void fill_request(struct nbd_request *request, int type, off64_t from, int len)
|
||||
int socket_nbd_read_hello( int fd, uint64_t* out_size )
|
||||
{
|
||||
struct nbd_init_raw init_raw;
|
||||
|
||||
|
||||
if ( 0 > readloop( fd, &init_raw, sizeof(init_raw) ) ) {
|
||||
warn( "Couldn't read init" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
return nbd_check_hello( &init_raw, out_size );
|
||||
}
|
||||
|
||||
void nbd_hello_to_buf( struct nbd_init_raw *buf, off64_t out_size )
|
||||
{
|
||||
struct nbd_init init;
|
||||
|
||||
memcpy( &init.passwd, INIT_PASSWD, 8 );
|
||||
init.magic = INIT_MAGIC;
|
||||
init.size = out_size;
|
||||
|
||||
memset( buf, 0, sizeof( struct nbd_init_raw ) ); // ensure reserved is 0s
|
||||
nbd_h2r_init( &init, buf );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int socket_nbd_write_hello(int fd, off64_t out_size)
|
||||
{
|
||||
struct nbd_init_raw init_raw;
|
||||
nbd_hello_to_buf( &init_raw, out_size );
|
||||
|
||||
if ( 0 > writeloop( fd, &init_raw, sizeof( init_raw ) ) ) {
|
||||
warn( SHOW_ERRNO( "failed to write hello to socket" ) );
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void fill_request(struct nbd_request *request, int type, uint64_t from, uint32_t len)
|
||||
{
|
||||
request->magic = htobe32(REQUEST_MAGIC);
|
||||
request->type = htobe32(type);
|
||||
((int*) request->handle)[0] = rand();
|
||||
((int*) request->handle)[1] = rand();
|
||||
request->handle.w = (((uint64_t)rand()) << 32) | ((uint64_t)rand());
|
||||
request->from = htobe64(from);
|
||||
request->len = htobe32(len);
|
||||
}
|
||||
@@ -81,7 +125,7 @@ void read_reply(int fd, struct nbd_request *request, struct nbd_reply *reply)
|
||||
if (reply->error != 0) {
|
||||
error("Server replied with error %d", reply->error);
|
||||
}
|
||||
if (strncmp(request->handle, reply->handle, 8) != 0) {
|
||||
if (request->handle.w != reply->handle.w) {
|
||||
error("Did not reply with correct handle");
|
||||
}
|
||||
}
|
||||
@@ -94,16 +138,17 @@ void wait_for_data( int fd, int timeout_secs )
|
||||
|
||||
FD_ZERO( &fds );
|
||||
FD_SET( fd, &fds );
|
||||
selected = select( FD_SETSIZE,
|
||||
&fds, NULL, NULL,
|
||||
timeout_secs >=0 ? &tv : NULL );
|
||||
|
||||
selected = sock_try_select(
|
||||
FD_SETSIZE, &fds, NULL, NULL, timeout_secs >=0 ? &tv : NULL
|
||||
);
|
||||
|
||||
FATAL_IF( -1 == selected, "Select failed" );
|
||||
ERROR_IF( 0 == selected, "Timed out waiting for reply" );
|
||||
}
|
||||
|
||||
|
||||
void socket_nbd_read(int fd, off64_t from, int len, int out_fd, void* out_buf, int timeout_secs)
|
||||
void socket_nbd_read(int fd, uint64_t from, uint32_t len, int out_fd, void* out_buf, int timeout_secs)
|
||||
{
|
||||
struct nbd_request request;
|
||||
struct nbd_reply reply;
|
||||
@@ -127,7 +172,7 @@ void socket_nbd_read(int fd, off64_t from, int len, int out_fd, void* out_buf, i
|
||||
}
|
||||
}
|
||||
|
||||
void socket_nbd_write(int fd, off64_t from, int len, int in_fd, void* in_buf, int timeout_secs)
|
||||
void socket_nbd_write(int fd, uint64_t from, uint32_t len, int in_fd, void* in_buf, int timeout_secs)
|
||||
{
|
||||
struct nbd_request request;
|
||||
struct nbd_reply reply;
|
||||
@@ -152,18 +197,6 @@ void socket_nbd_write(int fd, off64_t from, int len, int in_fd, void* in_buf, in
|
||||
}
|
||||
|
||||
|
||||
void socket_nbd_entrust( int fd )
|
||||
{
|
||||
struct nbd_request request;
|
||||
struct nbd_reply reply;
|
||||
|
||||
fill_request( &request, REQUEST_ENTRUST, 0, 0 );
|
||||
FATAL_IF_NEGATIVE( writeloop( fd, &request, sizeof( request ) ),
|
||||
"Couldn't write request");
|
||||
read_reply( fd, &request, &reply );
|
||||
}
|
||||
|
||||
|
||||
int socket_nbd_disconnect( int fd )
|
||||
{
|
||||
int success = 1;
|
||||
@@ -179,10 +212,12 @@ int socket_nbd_disconnect( int fd )
|
||||
}
|
||||
|
||||
#define CHECK_RANGE(error_type) { \
|
||||
off64_t size;\
|
||||
uint64_t size;\
|
||||
int success = socket_nbd_read_hello(params->client, &size); \
|
||||
if ( success ) {\
|
||||
if (params->from < 0 || (params->from + params->len) > size) {\
|
||||
uint64_t endpoint = params->from + params->len; \
|
||||
if (endpoint > size || \
|
||||
endpoint < params->from ) { /* this happens on overflow */ \
|
||||
fatal(error_type \
|
||||
" request %d+%d is out of range given size %d", \
|
||||
params->from, params->len, size\
|
23
src/common/readwrite.h
Normal file
23
src/common/readwrite.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef READWRITE_H
|
||||
|
||||
#define READWRITE_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include "nbdtypes.h"
|
||||
|
||||
int socket_connect(struct sockaddr* to, struct sockaddr* from);
|
||||
int socket_nbd_read_hello(int fd, uint64_t* size);
|
||||
int socket_nbd_write_hello(int fd, uint64_t size);
|
||||
void socket_nbd_read(int fd, uint64_t from, uint32_t len, int out_fd, void* out_buf, int timeout_secs);
|
||||
void socket_nbd_write(int fd, uint64_t from, uint32_t len, int out_fd, void* out_buf, int timeout_secs);
|
||||
int socket_nbd_disconnect( int fd );
|
||||
|
||||
/* as you can see, we're slowly accumulating code that should really be in an
|
||||
* NBD library */
|
||||
|
||||
void nbd_hello_to_buf( struct nbd_init_raw* buf, uint64_t out_size );
|
||||
int nbd_check_hello( struct nbd_init_raw* init_raw, uint64_t* out_size );
|
||||
|
||||
#endif
|
||||
|
@@ -15,12 +15,13 @@ void print_response( const char * response )
|
||||
NULLCHECK( response );
|
||||
|
||||
exit_status = atoi(response);
|
||||
response_text = strchr( response, ':' ) + 2;
|
||||
response_text = strchr( response, ':' );
|
||||
|
||||
NULLCHECK( response_text );
|
||||
FATAL_IF_NULL( response_text,
|
||||
"Error parsing server response: '%s'", response );
|
||||
|
||||
out = exit_status > 0 ? stderr : stdout;
|
||||
fprintf(out, "%s\n", response_text );
|
||||
fprintf(out, "%s\n", response_text + 2);
|
||||
}
|
||||
|
||||
void do_remote_command(char* command, char* socket_name, int argc, char** argv)
|
||||
@@ -62,7 +63,5 @@ void do_remote_command(char* command, char* socket_name, int argc, char** argv)
|
||||
print_response( response );
|
||||
|
||||
exit(atoi(response));
|
||||
|
||||
close(remote);
|
||||
}
|
||||
|
@@ -51,7 +51,6 @@ struct self_pipe * self_pipe_create(void)
|
||||
{
|
||||
struct self_pipe *sig = xmalloc( sizeof( struct self_pipe ) );
|
||||
int fds[2];
|
||||
int fcntl_err;
|
||||
|
||||
if ( NULL == sig ) { return NULL; }
|
||||
|
||||
@@ -62,7 +61,7 @@ struct self_pipe * self_pipe_create(void)
|
||||
}
|
||||
|
||||
if ( fcntl( fds[0], F_SETFL, O_NONBLOCK ) || fcntl( fds[1], F_SETFL, O_NONBLOCK ) ) {
|
||||
fcntl_err = errno;
|
||||
int fcntl_err = errno;
|
||||
while( close( fds[0] ) == -1 && errno == EINTR );
|
||||
while( close( fds[1] ) == -1 && errno == EINTR );
|
||||
free( sig );
|
256
src/common/sockutil.c
Normal file
256
src/common/sockutil.c
Normal file
@@ -0,0 +1,256 @@
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <sys/un.h>
|
||||
|
||||
#include "sockutil.h"
|
||||
#include "util.h"
|
||||
|
||||
size_t sockaddr_size( const struct sockaddr* sa )
|
||||
{
|
||||
struct sockaddr_un* un = (struct sockaddr_un*) sa;
|
||||
size_t ret = 0;
|
||||
|
||||
switch( sa->sa_family ) {
|
||||
case AF_INET:
|
||||
ret = sizeof( struct sockaddr_in );
|
||||
break;
|
||||
case AF_INET6:
|
||||
ret = sizeof( struct sockaddr_in6 );
|
||||
break;
|
||||
case AF_UNIX:
|
||||
ret = sizeof( un->sun_family ) + SUN_LEN( un );
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char* sockaddr_address_string( const struct sockaddr* sa, char* dest, size_t len )
|
||||
{
|
||||
NULLCHECK( sa );
|
||||
NULLCHECK( dest );
|
||||
|
||||
struct sockaddr_in* in = ( struct sockaddr_in* ) sa;
|
||||
struct sockaddr_in6* in6 = ( struct sockaddr_in6* ) sa;
|
||||
struct sockaddr_un* un = ( struct sockaddr_un* ) sa;
|
||||
|
||||
unsigned short real_port = ntohs( in->sin_port ); // common to in and in6
|
||||
const char* ret = NULL;
|
||||
|
||||
memset( dest, 0, len );
|
||||
|
||||
if ( sa->sa_family == AF_INET ) {
|
||||
ret = inet_ntop( AF_INET, &in->sin_addr, dest, len );
|
||||
} else if ( sa->sa_family == AF_INET6 ) {
|
||||
ret = inet_ntop( AF_INET6, &in6->sin6_addr, dest, len );
|
||||
} else if ( sa->sa_family == AF_UNIX ) {
|
||||
ret = strncpy( dest, un->sun_path, SUN_LEN( un ) );
|
||||
}
|
||||
|
||||
if ( ret == NULL ) {
|
||||
strncpy( dest, "???", len );
|
||||
}
|
||||
|
||||
if ( NULL != ret && real_port > 0 && sa->sa_family != AF_UNIX ) {
|
||||
size_t size = strlen( dest );
|
||||
snprintf( dest + size, len - size, " port %d", real_port );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sock_set_reuseaddr( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
/* Set the tcp_nodelay option */
|
||||
int sock_set_tcp_nodelay( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, IPPROTO_TCP, TCP_NODELAY, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
int sock_set_tcp_cork( int fd, int optval )
|
||||
{
|
||||
return setsockopt( fd, IPPROTO_TCP, TCP_CORK, &optval, sizeof(optval) );
|
||||
}
|
||||
|
||||
int sock_set_nonblock( int fd, int optval )
|
||||
{
|
||||
int flags = fcntl( fd, F_GETFL );
|
||||
|
||||
if ( flags == -1 ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( optval ) {
|
||||
flags = flags | O_NONBLOCK;
|
||||
} else {
|
||||
flags = flags & (~O_NONBLOCK);
|
||||
}
|
||||
|
||||
return fcntl( fd, F_SETFL, flags );
|
||||
}
|
||||
|
||||
int sock_try_bind( int fd, const struct sockaddr* sa )
|
||||
{
|
||||
int bind_result;
|
||||
char s_address[256];
|
||||
int retry = 10;
|
||||
|
||||
sockaddr_address_string( sa, &s_address[0], 256 );
|
||||
|
||||
do {
|
||||
bind_result = bind( fd, sa, sockaddr_size( sa ) );
|
||||
if ( 0 == bind_result ) {
|
||||
info( "Bound to %s", s_address );
|
||||
break;
|
||||
}
|
||||
else {
|
||||
warn( SHOW_ERRNO( "Couldn't bind to %s", s_address ) );
|
||||
|
||||
switch ( errno ) {
|
||||
/* bind() can give us EACCES, EADDRINUSE, EADDRNOTAVAIL, EBADF,
|
||||
* EINVAL, ENOTSOCK, EFAULT, ELOOP, ENAMETOOLONG, ENOENT,
|
||||
* ENOMEM, ENOTDIR, EROFS
|
||||
*
|
||||
* Any of these other than EADDRINUSE & EADDRNOTAVAIL signify
|
||||
* that there's a logic error somewhere.
|
||||
*
|
||||
* EADDRINUSE is fatal: if there's something already where we
|
||||
* want to be listening, we have no guarantees that any clients
|
||||
* will cope with it.
|
||||
*/
|
||||
case EADDRNOTAVAIL:
|
||||
retry--;
|
||||
if (retry) {
|
||||
debug( "retrying" );
|
||||
sleep( 1 );
|
||||
}
|
||||
continue;
|
||||
case EADDRINUSE:
|
||||
warn( "%s in use, giving up.", s_address );
|
||||
retry = 0;
|
||||
break;
|
||||
default:
|
||||
warn( "giving up" );
|
||||
retry = 0;
|
||||
}
|
||||
}
|
||||
} while ( retry );
|
||||
|
||||
return bind_result;
|
||||
}
|
||||
|
||||
int sock_try_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout)
|
||||
{
|
||||
int result;
|
||||
|
||||
do {
|
||||
result = select(nfds, readfds, writefds, exceptfds, timeout);
|
||||
if ( errno != EINTR ) {
|
||||
break;
|
||||
}
|
||||
|
||||
} while ( result == -1 );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int sock_try_connect( int fd, struct sockaddr* to, socklen_t addrlen, int wait )
|
||||
{
|
||||
fd_set fds;
|
||||
struct timeval tv = { wait, 0 };
|
||||
int result = 0;
|
||||
|
||||
if ( sock_set_nonblock( fd, 1 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to set socket non-blocking for connect()" ) );
|
||||
return connect( fd, to, addrlen );
|
||||
}
|
||||
|
||||
FD_ZERO( &fds );
|
||||
FD_SET( fd, &fds );
|
||||
|
||||
do {
|
||||
result = connect( fd, to, addrlen );
|
||||
|
||||
if ( result == -1 ) {
|
||||
switch( errno ) {
|
||||
case EINPROGRESS:
|
||||
result = 0;
|
||||
break; /* success */
|
||||
case EAGAIN:
|
||||
case EINTR:
|
||||
/* Try connect() again. This only breaks out of the switch,
|
||||
* not the do...while loop. since result == -1, we go again.
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
warn( SHOW_ERRNO( "Failed to connect()" ) );
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} while ( result == -1 );
|
||||
|
||||
if ( -1 == sock_try_select( FD_SETSIZE, NULL, &fds, NULL, &tv) ) {
|
||||
warn( SHOW_ERRNO( "failed to select() on non-blocking connect" ) );
|
||||
result = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ( !FD_ISSET( fd, &fds ) ) {
|
||||
result = -1;
|
||||
errno = ETIMEDOUT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
int scratch;
|
||||
socklen_t s_size = sizeof( scratch );
|
||||
if ( getsockopt( fd, SOL_SOCKET, SO_ERROR, &scratch, &s_size ) == -1 ) {
|
||||
result = -1;
|
||||
warn( SHOW_ERRNO( "getsockopt() failed" ) );
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ( scratch == EINPROGRESS ) {
|
||||
scratch = ETIMEDOUT;
|
||||
}
|
||||
|
||||
result = scratch ? -1 : 0;
|
||||
errno = scratch;
|
||||
|
||||
out:
|
||||
if ( sock_set_nonblock( fd, 0 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to make socket blocking after connect()" ) );
|
||||
return -1;
|
||||
}
|
||||
|
||||
debug( "sock_try_connect: %i", result );
|
||||
return result;
|
||||
}
|
||||
|
||||
int sock_try_close( int fd )
|
||||
{
|
||||
int result;
|
||||
|
||||
do {
|
||||
result = close( fd );
|
||||
|
||||
if ( result == -1 ) {
|
||||
if ( EINTR == errno ) {
|
||||
continue; /* retry EINTR */
|
||||
} else {
|
||||
warn( SHOW_ERRNO( "Failed to close() fd %i", fd ) );
|
||||
break; /* Other errors get reported */
|
||||
}
|
||||
}
|
||||
|
||||
} while( 0 );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
41
src/common/sockutil.h
Normal file
41
src/common/sockutil.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef SOCKUTIL_H
|
||||
|
||||
#define SOCKUTIL_H
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/select.h>
|
||||
|
||||
/* Returns the size of the sockaddr, or 0 on error */
|
||||
size_t sockaddr_size(const struct sockaddr* sa);
|
||||
|
||||
/* Convert a sockaddr into an address. Like inet_ntop, it returns dest if
|
||||
* successful, NULL otherwise. In the latter case, dest will contain "???"
|
||||
*/
|
||||
const char* sockaddr_address_string(const struct sockaddr* sa, char* dest, size_t len);
|
||||
|
||||
/* Set the SOL_REUSEADDR otion */
|
||||
int sock_set_reuseaddr(int fd, int optval);
|
||||
|
||||
/* Set the tcp_nodelay option */
|
||||
int sock_set_tcp_nodelay(int fd, int optval);
|
||||
|
||||
/* Set the tcp_cork option */
|
||||
int sock_set_tcp_cork(int fd, int optval);
|
||||
|
||||
int sock_set_nonblock(int fd, int optval);
|
||||
|
||||
/* Attempt to bind the fd to the sockaddr, retrying common transient failures */
|
||||
int sock_try_bind(int fd, const struct sockaddr* sa);
|
||||
|
||||
/* Try to call select(), retrying EINTR */
|
||||
int sock_try_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout);
|
||||
|
||||
/* Try to call connect(), timing out after wait seconds */
|
||||
int sock_try_connect( int fd, struct sockaddr* to, socklen_t addrlen, int wait );
|
||||
|
||||
/* Try to call close(), retrying EINTR */
|
||||
int sock_try_close( int fd );
|
||||
|
||||
#endif
|
||||
|
@@ -6,6 +6,7 @@
|
||||
#include <errno.h>
|
||||
#include <malloc.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
@@ -50,6 +51,25 @@ void mylog(int line_level, const char* format, ...)
|
||||
va_end(argptr);
|
||||
}
|
||||
|
||||
uint64_t monotonic_time_ms()
|
||||
{
|
||||
struct timespec ts;
|
||||
uint64_t seconds_ms, nanoseconds_ms;
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts),
|
||||
SHOW_ERRNO( "clock_gettime failed" )
|
||||
);
|
||||
|
||||
seconds_ms = ts.tv_sec;
|
||||
seconds_ms = seconds_ms * 1000;
|
||||
|
||||
nanoseconds_ms = ts.tv_nsec;
|
||||
nanoseconds_ms = nanoseconds_ms / 1000000;
|
||||
|
||||
return seconds_ms + nanoseconds_ms;
|
||||
}
|
||||
|
||||
|
||||
void* xrealloc(void* ptr, size_t size)
|
||||
{
|
@@ -8,6 +8,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
void* xrealloc(void* ptr, size_t size);
|
||||
void* xmalloc(size_t size);
|
||||
@@ -85,9 +86,13 @@ void error_handler(int fatal);
|
||||
/* mylog a line at the given level (0 being most verbose) */
|
||||
void mylog(int line_level, const char* format, ...);
|
||||
|
||||
/* Returns the current time, in milliseconds, from CLOCK_MONOTONIC */
|
||||
uint64_t monotonic_time_ms(void);
|
||||
|
||||
|
||||
#define levstr(i) (i==0?'D':(i==1?'I':(i==2?'W':(i==3?'E':'F'))))
|
||||
|
||||
#define myloglev(level, msg, ...) mylog( level, "%c:%d %p %s:%d: "msg"\n", levstr(level), getpid(),pthread_self(), __FILE__, __LINE__, ##__VA_ARGS__ )
|
||||
#define myloglev(level, msg, ...) mylog( level, "%"PRIu64":%c:%d %p %s:%d: "msg"\n", monotonic_time_ms(), levstr(level), getpid(),pthread_self(), __FILE__, __LINE__, ##__VA_ARGS__ )
|
||||
|
||||
#ifdef DEBUG
|
||||
# define debug(msg, ...) myloglev(0, msg, ##__VA_ARGS__)
|
||||
@@ -111,6 +116,7 @@ void mylog(int line_level, const char* format, ...);
|
||||
#define fatal(msg, ...) do { \
|
||||
myloglev(4, msg, ##__VA_ARGS__); \
|
||||
error_handler(1); \
|
||||
exit(1); /* never-reached, this is to make static code analizer happy */ \
|
||||
} while(0)
|
||||
|
||||
|
||||
@@ -148,6 +154,9 @@ void mylog(int line_level, const char* format, ...);
|
||||
|
||||
#define NULLCHECK(value) FATAL_IF_NULL(value, "BUG: " #value " is null")
|
||||
|
||||
#define SHOW_ERRNO( msg, ... ) msg ": %s (%i)", ##__VA_ARGS__, ( errno == 0 ? "EOF" : strerror(errno) ), errno
|
||||
|
||||
#define WARN_IF_NEGATIVE( value, msg, ... ) if ( value < 0 ) { warn( msg, ##__VA_ARGS__ ); }
|
||||
|
||||
#endif
|
||||
|
282
src/ioutil.c
282
src/ioutil.c
@@ -1,282 +0,0 @@
|
||||
#include <sys/mman.h>
|
||||
#include <sys/sendfile.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/types.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/fiemap.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "bitset.h"
|
||||
|
||||
struct bitset_mapping* build_allocation_map(int fd, uint64_t size, int resolution)
|
||||
{
|
||||
unsigned int i;
|
||||
struct bitset_mapping* allocation_map = bitset_alloc(size, resolution);
|
||||
struct fiemap *fiemap_count = NULL, *fiemap = NULL;
|
||||
|
||||
fiemap_count = (struct fiemap*) xmalloc(sizeof(struct fiemap));
|
||||
|
||||
fiemap_count->fm_start = 0;
|
||||
fiemap_count->fm_length = size;
|
||||
fiemap_count->fm_flags = 0;
|
||||
fiemap_count->fm_extent_count = 0;
|
||||
fiemap_count->fm_mapped_extents = 0;
|
||||
|
||||
/* Find out how many extents there are */
|
||||
if (ioctl(fd, FS_IOC_FIEMAP, fiemap_count) < 0) {
|
||||
debug( "Couldn't get fiemap_count, returning no allocation_map" );
|
||||
goto no_map;
|
||||
}
|
||||
|
||||
/* Resize fiemap to allow us to read in the extents */
|
||||
fiemap = (struct fiemap*)xmalloc(
|
||||
sizeof(struct fiemap) + (
|
||||
sizeof(struct fiemap_extent) *
|
||||
fiemap_count->fm_mapped_extents
|
||||
)
|
||||
);
|
||||
|
||||
/* realloc makes valgrind complain a lot */
|
||||
memcpy(fiemap, fiemap_count, sizeof(struct fiemap));
|
||||
free( fiemap_count );
|
||||
|
||||
fiemap->fm_extent_count = fiemap->fm_mapped_extents;
|
||||
fiemap->fm_mapped_extents = 0;
|
||||
|
||||
if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) {
|
||||
debug( "Couldn't get fiemap, returning no allocation_map" );
|
||||
goto no_map;
|
||||
}
|
||||
|
||||
for (i=0;i<fiemap->fm_mapped_extents;i++) {
|
||||
bitset_set_range(
|
||||
allocation_map,
|
||||
fiemap->fm_extents[i].fe_logical,
|
||||
fiemap->fm_extents[i].fe_length
|
||||
);
|
||||
}
|
||||
|
||||
/* This is pointlessly verbose for real discs, it's here as a
|
||||
* reference for pulling data out of the allocation map */
|
||||
if ( 0 ) {
|
||||
for (i=0; i<(size/resolution); i++) {
|
||||
debug("map[%d] = %d%d%d%d%d%d%d%d",
|
||||
i,
|
||||
(allocation_map->bits[i] & 1) == 1,
|
||||
(allocation_map->bits[i] & 2) == 2,
|
||||
(allocation_map->bits[i] & 4) == 4,
|
||||
(allocation_map->bits[i] & 8) == 8,
|
||||
(allocation_map->bits[i] & 16) == 16,
|
||||
(allocation_map->bits[i] & 32) == 32,
|
||||
(allocation_map->bits[i] & 64) == 64,
|
||||
(allocation_map->bits[i] & 128) == 128
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
free(fiemap);
|
||||
|
||||
debug("Successfully built allocation map");
|
||||
return allocation_map;
|
||||
|
||||
no_map:
|
||||
free( allocation_map );
|
||||
if ( NULL != fiemap ) { free( fiemap ); }
|
||||
if ( NULL != fiemap_count ) { free( fiemap_count ); }
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
int open_and_mmap(const char* filename, int* out_fd, off64_t *out_size, void **out_map)
|
||||
{
|
||||
off64_t size;
|
||||
|
||||
/* O_DIRECT seems to be intermittently supported. Leaving it as
|
||||
* a compile-time option for now. */
|
||||
#ifdef DIRECT_IO
|
||||
*out_fd = open(filename, O_RDWR | O_DIRECT | O_SYNC );
|
||||
#else
|
||||
*out_fd = open(filename, O_RDWR | O_SYNC );
|
||||
#endif
|
||||
|
||||
if (*out_fd < 1) {
|
||||
warn("open(%s) failed: does it exist?", filename);
|
||||
return *out_fd;
|
||||
}
|
||||
|
||||
size = lseek64(*out_fd, 0, SEEK_END);
|
||||
if (size < 0) {
|
||||
warn("lseek64() failed");
|
||||
return size;
|
||||
}
|
||||
if (out_size) {
|
||||
*out_size = size;
|
||||
}
|
||||
|
||||
if (out_map) {
|
||||
*out_map = mmap64(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED,
|
||||
*out_fd, 0);
|
||||
if (((long) *out_map) == -1) {
|
||||
warn("mmap64() failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
debug("opened %s size %ld on fd %d @ %p", filename, size, *out_fd, *out_map);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int writeloop(int filedes, const void *buffer, size_t size)
|
||||
{
|
||||
size_t written=0;
|
||||
while (written < size) {
|
||||
ssize_t result = write(filedes, buffer+written, size-written);
|
||||
if (result == -1) { return -1; }
|
||||
written += result;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int readloop(int filedes, void *buffer, size_t size)
|
||||
{
|
||||
size_t readden=0;
|
||||
while (readden < size) {
|
||||
ssize_t result = read(filedes, buffer+readden, size-readden);
|
||||
if (result == 0 /* EOF */ || result == -1 /* error */) {
|
||||
return -1;
|
||||
}
|
||||
readden += result;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sendfileloop(int out_fd, int in_fd, off64_t *offset, size_t count)
|
||||
{
|
||||
size_t sent=0;
|
||||
while (sent < count) {
|
||||
ssize_t result = sendfile64(out_fd, in_fd, offset, count-sent);
|
||||
debug("sendfile64(out_fd=%d, in_fd=%d, offset=%p, count-sent=%ld) = %ld", out_fd, in_fd, offset, count-sent, result);
|
||||
|
||||
if (result == -1) { return -1; }
|
||||
sent += result;
|
||||
debug("sent=%ld, count=%ld", sent, count);
|
||||
}
|
||||
debug("exiting sendfileloop");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include <errno.h>
|
||||
ssize_t spliceloop(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags2)
|
||||
{
|
||||
const unsigned int flags = SPLICE_F_MORE|SPLICE_F_MOVE|flags2;
|
||||
size_t spliced=0;
|
||||
|
||||
//debug("spliceloop(%d, %ld, %d, %ld, %ld)", fd_in, off_in ? *off_in : 0, fd_out, off_out ? *off_out : 0, len);
|
||||
|
||||
while (spliced < len) {
|
||||
ssize_t result = splice(fd_in, off_in, fd_out, off_out, len, flags);
|
||||
if (result < 0) {
|
||||
//debug("result=%ld (%s), spliced=%ld, len=%ld", result, strerror(errno), spliced, len);
|
||||
if (errno == EAGAIN && (flags & SPLICE_F_NONBLOCK) ) {
|
||||
return spliced;
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
spliced += result;
|
||||
//debug("result=%ld (%s), spliced=%ld, len=%ld", result, strerror(errno), spliced, len);
|
||||
}
|
||||
}
|
||||
|
||||
return spliced;
|
||||
}
|
||||
|
||||
int splice_via_pipe_loop(int fd_in, int fd_out, size_t len)
|
||||
{
|
||||
|
||||
int pipefd[2]; /* read end, write end */
|
||||
size_t spliced=0;
|
||||
|
||||
if (pipe(pipefd) == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (spliced < len) {
|
||||
ssize_t run = len-spliced;
|
||||
ssize_t s2, s1 = spliceloop(fd_in, NULL, pipefd[1], NULL, run, SPLICE_F_NONBLOCK);
|
||||
/*if (run > 65535)
|
||||
run = 65535;*/
|
||||
if (s1 < 0) { break; }
|
||||
|
||||
s2 = spliceloop(pipefd[0], NULL, fd_out, NULL, s1, 0);
|
||||
if (s2 < 0) { break; }
|
||||
spliced += s2;
|
||||
}
|
||||
close(pipefd[0]);
|
||||
close(pipefd[1]);
|
||||
|
||||
return spliced < len ? -1 : 0;
|
||||
}
|
||||
|
||||
/* Reads single bytes from fd until either an EOF or a newline appears.
|
||||
* If an EOF occurs before a newline, returns -1. The line is lost.
|
||||
* Inserts the read bytes (without the newline) into buf, followed by a
|
||||
* trailing NULL.
|
||||
* Returns the number of read bytes: the length of the line without the
|
||||
* newline, plus the trailing null.
|
||||
*/
|
||||
int read_until_newline(int fd, char* buf, int bufsize)
|
||||
{
|
||||
int cur;
|
||||
|
||||
for (cur=0; cur < bufsize; cur++) {
|
||||
int result = read(fd, buf+cur, 1);
|
||||
if (result <= 0) { return -1; }
|
||||
if (buf[cur] == 10) {
|
||||
buf[cur] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return cur+1;
|
||||
}
|
||||
|
||||
int read_lines_until_blankline(int fd, int max_line_length, char ***lines)
|
||||
{
|
||||
int lines_count = 0;
|
||||
char line[max_line_length+1];
|
||||
*lines = NULL;
|
||||
|
||||
memset(line, 0, max_line_length+1);
|
||||
|
||||
while (1) {
|
||||
int readden = read_until_newline(fd, line, max_line_length);
|
||||
/* readden will be:
|
||||
* 1 for an empty line
|
||||
* -1 for an eof
|
||||
* -1 for a read error
|
||||
*/
|
||||
if (readden <= 1) { return lines_count; }
|
||||
*lines = xrealloc(*lines, (lines_count+1) * sizeof(char*));
|
||||
(*lines)[lines_count] = strdup(line);
|
||||
if ((*lines)[lines_count][0] == 0) {
|
||||
return lines_count;
|
||||
}
|
||||
lines_count++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int fd_is_closed( int fd_in )
|
||||
{
|
||||
int errno_old = errno;
|
||||
int result = fcntl( fd_in, F_GETFL ) < 0;
|
||||
errno = errno_old;
|
||||
return result;
|
||||
}
|
120
src/listen.c
120
src/listen.c
@@ -1,120 +0,0 @@
|
||||
#include "listen.h"
|
||||
#include "serve.h"
|
||||
#include "util.h"
|
||||
#include "flexnbd.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
struct listen * listen_create(
|
||||
struct flexnbd * flexnbd,
|
||||
char* s_ip_address,
|
||||
char* s_rebind_ip_address,
|
||||
char* s_port,
|
||||
char* s_rebind_port,
|
||||
char* s_file,
|
||||
int default_deny,
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
struct listen * listen;
|
||||
|
||||
listen = (struct listen *)xmalloc( sizeof( struct listen ) );
|
||||
listen->flexnbd = flexnbd;
|
||||
listen->init_serve = server_create(
|
||||
flexnbd,
|
||||
s_ip_address,
|
||||
s_port,
|
||||
s_file,
|
||||
default_deny,
|
||||
acl_entries,
|
||||
s_acl_entries,
|
||||
1, 0);
|
||||
listen->main_serve = server_create(
|
||||
flexnbd,
|
||||
s_rebind_ip_address ? s_rebind_ip_address : s_ip_address,
|
||||
s_rebind_port ? s_rebind_port : s_port,
|
||||
s_file,
|
||||
default_deny,
|
||||
acl_entries,
|
||||
s_acl_entries,
|
||||
max_nbd_clients, 1);
|
||||
return listen;
|
||||
}
|
||||
|
||||
|
||||
void listen_destroy( struct listen * listen )
|
||||
{
|
||||
NULLCHECK( listen );
|
||||
free( listen );
|
||||
}
|
||||
|
||||
|
||||
struct server *listen_switch( struct listen * listen )
|
||||
{
|
||||
NULLCHECK( listen );
|
||||
|
||||
/* TODO: Copy acl from init_serve to main_serve */
|
||||
/* TODO: rename underlying file from foo.INCOMPLETE to foo */
|
||||
|
||||
server_destroy( listen->init_serve );
|
||||
listen->init_serve = NULL;
|
||||
info( "Switched to the main server, serving." );
|
||||
return listen->main_serve;
|
||||
}
|
||||
|
||||
|
||||
void listen_cleanup( struct listen * listen )
|
||||
{
|
||||
NULLCHECK( listen );
|
||||
|
||||
if ( flexnbd_switch_locked( listen->flexnbd ) ) {
|
||||
flexnbd_unlock_switch( listen->flexnbd );
|
||||
}
|
||||
}
|
||||
|
||||
int do_listen( struct listen * listen )
|
||||
{
|
||||
NULLCHECK( listen );
|
||||
|
||||
int have_control = 0;
|
||||
|
||||
flexnbd_lock_switch( listen->flexnbd );
|
||||
{
|
||||
flexnbd_set_server( listen->flexnbd, listen->init_serve );
|
||||
}
|
||||
flexnbd_unlock_switch( listen->flexnbd );
|
||||
|
||||
/* WATCH FOR RACES HERE: flexnbd->serve is set, but the server
|
||||
* isn't running yet and the switch lock is released.
|
||||
*/
|
||||
have_control = do_serve( listen->init_serve );
|
||||
|
||||
|
||||
if( have_control ) {
|
||||
info( "Taking control.");
|
||||
flexnbd_switch( listen->flexnbd, listen_switch );
|
||||
/* WATCH FOR RACES HERE: the server hasn't been
|
||||
* restarted before we release the flexnbd switch lock.
|
||||
* do_serve doesn't return, so there's not a lot of
|
||||
* choice about that.
|
||||
*/
|
||||
do_serve( listen->main_serve );
|
||||
}
|
||||
else {
|
||||
warn("Failed to take control, giving up.");
|
||||
server_destroy( listen->init_serve );
|
||||
listen->init_serve = NULL;
|
||||
}
|
||||
/* TODO: here we must signal the control thread to stop before
|
||||
* it tries to */
|
||||
server_destroy( listen->main_serve );
|
||||
listen->main_serve = NULL;
|
||||
|
||||
debug("Listen done, cleaning up");
|
||||
listen_cleanup( listen );
|
||||
|
||||
return have_control;
|
||||
}
|
||||
|
28
src/listen.h
28
src/listen.h
@@ -1,28 +0,0 @@
|
||||
#ifndef LISTEN_H
|
||||
#define LISTEN_H
|
||||
|
||||
#include "flexnbd.h"
|
||||
#include "serve.h"
|
||||
|
||||
struct listen {
|
||||
struct flexnbd * flexnbd;
|
||||
struct server * init_serve;
|
||||
struct server * main_serve;
|
||||
};
|
||||
|
||||
struct listen * listen_create(
|
||||
struct flexnbd * flexnbd,
|
||||
char* s_ip_address,
|
||||
char* s_rebind_ip_address,
|
||||
char* s_port,
|
||||
char* s_rebind_port,
|
||||
char* s_file,
|
||||
int default_deny,
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients );
|
||||
void listen_destroy( struct listen* );
|
||||
|
||||
int do_listen( struct listen * );
|
||||
|
||||
#endif
|
@@ -2,13 +2,16 @@
|
||||
#include "mode.h"
|
||||
|
||||
#include <signal.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
signal(SIGPIPE, SIG_IGN); /* calls to splice() unhelpfully throw this */
|
||||
error_init();
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
if (argc < 2) {
|
||||
exit_err( help_help_text );
|
||||
}
|
||||
|
614
src/mirror.c
614
src/mirror.c
@@ -1,614 +0,0 @@
|
||||
/* FlexNBD server (C) Bytemark Hosting 2012
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "mirror.h"
|
||||
#include "serve.h"
|
||||
#include "util.h"
|
||||
#include "ioutil.h"
|
||||
#include "parse.h"
|
||||
#include "readwrite.h"
|
||||
#include "bitset.h"
|
||||
#include "self_pipe.h"
|
||||
#include "status.h"
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/un.h>
|
||||
#include <unistd.h>
|
||||
|
||||
struct mirror * mirror_alloc(
|
||||
union mysockaddr * connect_to,
|
||||
union mysockaddr * connect_from,
|
||||
int max_Bps,
|
||||
int action_at_finish,
|
||||
struct mbox * commit_signal)
|
||||
{
|
||||
struct mirror * mirror;
|
||||
|
||||
mirror = xmalloc(sizeof(struct mirror));
|
||||
mirror->connect_to = connect_to;
|
||||
mirror->connect_from = connect_from;
|
||||
mirror->max_bytes_per_second = max_Bps;
|
||||
mirror->action_at_finish = action_at_finish;
|
||||
mirror->commit_signal = commit_signal;
|
||||
mirror->commit_state = MS_UNKNOWN;
|
||||
|
||||
return mirror;
|
||||
}
|
||||
|
||||
void mirror_set_state_f( struct mirror * mirror, enum mirror_state state )
|
||||
{
|
||||
NULLCHECK( mirror );
|
||||
mirror->commit_state = state;
|
||||
}
|
||||
|
||||
#define mirror_set_state( mirror, state ) do{\
|
||||
debug( "Mirror state => " #state );\
|
||||
mirror_set_state_f( mirror, state );\
|
||||
} while(0)
|
||||
|
||||
enum mirror_state mirror_get_state( struct mirror * mirror )
|
||||
{
|
||||
NULLCHECK( mirror );
|
||||
return mirror->commit_state;
|
||||
}
|
||||
|
||||
|
||||
void mirror_init( struct mirror * mirror, const char * filename )
|
||||
{
|
||||
int map_fd;
|
||||
off64_t size;
|
||||
|
||||
NULLCHECK( mirror );
|
||||
NULLCHECK( filename );
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
open_and_mmap(
|
||||
filename,
|
||||
&map_fd,
|
||||
&size,
|
||||
(void**) &mirror->mapped
|
||||
),
|
||||
"Failed to open and mmap %s",
|
||||
filename
|
||||
);
|
||||
|
||||
mirror->dirty_map = bitset_alloc(size, 4096);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Call this before a mirror attempt. */
|
||||
void mirror_reset( struct mirror * mirror )
|
||||
{
|
||||
NULLCHECK( mirror );
|
||||
NULLCHECK( mirror->dirty_map );
|
||||
mirror_set_state( mirror, MS_INIT );
|
||||
bitset_set(mirror->dirty_map);
|
||||
}
|
||||
|
||||
|
||||
struct mirror * mirror_create(
|
||||
const char * filename,
|
||||
union mysockaddr * connect_to,
|
||||
union mysockaddr * connect_from,
|
||||
int max_Bps,
|
||||
int action_at_finish,
|
||||
struct mbox * commit_signal)
|
||||
{
|
||||
/* FIXME: shouldn't map_fd get closed? */
|
||||
struct mirror * mirror;
|
||||
|
||||
mirror = mirror_alloc( connect_to,
|
||||
connect_from,
|
||||
max_Bps,
|
||||
action_at_finish,
|
||||
commit_signal);
|
||||
|
||||
mirror_init( mirror, filename );
|
||||
mirror_reset( mirror );
|
||||
|
||||
|
||||
return mirror;
|
||||
}
|
||||
|
||||
|
||||
void mirror_destroy( struct mirror *mirror )
|
||||
{
|
||||
NULLCHECK( mirror );
|
||||
free(mirror->connect_to);
|
||||
free(mirror->connect_from);
|
||||
free(mirror->dirty_map);
|
||||
free(mirror);
|
||||
}
|
||||
|
||||
|
||||
/** The mirror code will split NBD writes, making them this long as a maximum */
|
||||
static const int mirror_longest_write = 8<<20;
|
||||
|
||||
/** If, during a mirror pass, we have sent this number of bytes or fewer, we
|
||||
* go to freeze the I/O and finish it off. This is just a guess.
|
||||
*/
|
||||
static const unsigned int mirror_last_pass_after_bytes_written = 100<<20;
|
||||
|
||||
/** The largest number of full passes we'll do - the last one will always
|
||||
* cause the I/O to freeze, however many bytes are left to copy.
|
||||
*/
|
||||
static const int mirror_maximum_passes = 7;
|
||||
|
||||
/* A single mirror pass over the disc, optionally locking IO around the
|
||||
* transfer.
|
||||
*/
|
||||
int mirror_pass(struct server * serve, int should_lock, uint64_t *written)
|
||||
{
|
||||
uint64_t current = 0;
|
||||
int success = 1;
|
||||
struct bitset_mapping *map = serve->mirror->dirty_map;
|
||||
*written = 0;
|
||||
|
||||
while (current < serve->size) {
|
||||
int run = bitset_run_count(map, current, mirror_longest_write);
|
||||
|
||||
debug("mirror current=%ld, run=%d", current, run);
|
||||
|
||||
/* FIXME: we could avoid sending sparse areas of the
|
||||
* disc here, and probably save a lot of bandwidth and
|
||||
* time (if we know the destination starts off zeroed).
|
||||
*/
|
||||
if (bitset_is_set_at(map, current)) {
|
||||
/* We've found a dirty area, send it */
|
||||
debug("^^^ writing");
|
||||
|
||||
/* We need to stop the main thread from working
|
||||
* because it might corrupt the dirty map. This
|
||||
* is likely to slow things down but will be
|
||||
* safe.
|
||||
*/
|
||||
if (should_lock) { server_lock_io( serve ); }
|
||||
{
|
||||
debug("in lock block");
|
||||
/** FIXME: do something useful with bytes/second */
|
||||
|
||||
/** FIXME: error handling code here won't unlock */
|
||||
socket_nbd_write( serve->mirror->client,
|
||||
current,
|
||||
run,
|
||||
0,
|
||||
serve->mirror->mapped + current,
|
||||
MS_REQUEST_LIMIT_SECS);
|
||||
|
||||
/* now mark it clean */
|
||||
bitset_clear_range(map, current, run);
|
||||
debug("leaving lock block");
|
||||
}
|
||||
if (should_lock) { server_unlock_io( serve ); }
|
||||
|
||||
*written += run;
|
||||
}
|
||||
current += run;
|
||||
|
||||
if (serve->mirror->signal_abandon) {
|
||||
debug("Abandon message received" );
|
||||
success = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
void mirror_give_control( struct mirror * mirror )
|
||||
{
|
||||
debug( "mirror: entrusting and disconnecting" );
|
||||
/* TODO: set up an error handler to clean up properly on ERROR.
|
||||
*/
|
||||
|
||||
/* A transfer of control is expressed as a 3-way handshake.
|
||||
* First, We send a REQUEST_ENTRUST. If this fails to be
|
||||
* received, this thread will simply block until the server is
|
||||
* restarted. If the remote end doesn't understand it, it'll
|
||||
* disconnect us, and an ERROR *should* bomb this thread.
|
||||
* FIXME: make the ERROR work.
|
||||
* If we get an explicit error back from the remote end, then
|
||||
* again, this thread will bomb out.
|
||||
* On receiving a valid response, we send a REQUEST_DISCONNECT,
|
||||
* and we quit without checking for a response. This is the
|
||||
* remote server's signal to assume control of the file. The
|
||||
* reason we don't check for a response is the state we end up
|
||||
* in if the final message goes astray: if we lose the
|
||||
* REQUEST_DISCONNECT, the sender has quit and the receiver
|
||||
* hasn't had a signal to take over yet, so the data is safe.
|
||||
* If we were to wait for a response to the REQUEST_DISCONNECT,
|
||||
* the sender and receiver would *both* be servicing write
|
||||
* requests while the response was in flight, and if the
|
||||
* response went astray we'd have two servers claiming
|
||||
* responsibility for the same data.
|
||||
*
|
||||
* The meaning of these is as follows:
|
||||
* The entrust signifies that all the data has been sent, and
|
||||
* the client is currently paused but not disconnected.
|
||||
* The disconnect signifies that the client has been
|
||||
* safely prevented from making any more writes.
|
||||
*
|
||||
* Since we lock io and close the server it in mirror_on_exit before
|
||||
* releasing, we don't actually need to take any action between the
|
||||
* two here.
|
||||
*/
|
||||
socket_nbd_entrust( mirror->client );
|
||||
socket_nbd_disconnect( mirror->client );
|
||||
}
|
||||
|
||||
|
||||
/* THIS FUNCTION MUST ONLY BE CALLED WITH THE SERVER'S IO LOCKED. */
|
||||
void mirror_on_exit( struct server * serve )
|
||||
{
|
||||
/* Send an explicit entrust and disconnect. After this
|
||||
* point we cannot allow any reads or writes to the local file.
|
||||
* We do this *before* trying to shut down the server so that if
|
||||
* the transfer of control fails, we haven't stopped the server
|
||||
* and already-connected clients don't get needlessly
|
||||
* disconnected.
|
||||
*/
|
||||
debug( "mirror_give_control");
|
||||
mirror_give_control( serve->mirror );
|
||||
|
||||
/* If we're still here, the transfer of control went ok, and the
|
||||
* remote is listening (or will be shortly). We can shut the
|
||||
* server down.
|
||||
*
|
||||
* It doesn't matter if we get new client connections before
|
||||
* now, the IO lock will stop them from doing anything.
|
||||
*/
|
||||
debug("serve_signal_close");
|
||||
serve_signal_close( serve );
|
||||
|
||||
/* We have to wait until the server is closed before unlocking
|
||||
* IO. This is because the client threads check to see if the
|
||||
* server is still open before reading or writing inside their
|
||||
* own locks. If we don't wait for the close, there's no way to
|
||||
* guarantee the server thread will win the race and we risk the
|
||||
* clients seeing a "successful" write to a dead disc image.
|
||||
*/
|
||||
debug("serve_wait_for_close");
|
||||
serve_wait_for_close( serve );
|
||||
info("Mirror sent.");
|
||||
}
|
||||
|
||||
|
||||
void mirror_cleanup( struct server * serve,
|
||||
int fatal __attribute__((unused)))
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
struct mirror * mirror = serve->mirror;
|
||||
NULLCHECK( mirror );
|
||||
info( "Cleaning up mirror thread");
|
||||
|
||||
if( mirror->client && mirror->client > 0 ){
|
||||
close( mirror->client );
|
||||
}
|
||||
mirror->client = -1;
|
||||
|
||||
if( server_io_locked( serve ) ){ server_unlock_io( serve ); }
|
||||
}
|
||||
|
||||
|
||||
|
||||
int mirror_connect( struct mirror * mirror, off64_t local_size )
|
||||
{
|
||||
struct sockaddr * connect_from = NULL;
|
||||
int connected = 0;
|
||||
|
||||
if ( mirror->connect_from ) {
|
||||
connect_from = &mirror->connect_from->generic;
|
||||
}
|
||||
|
||||
NULLCHECK( mirror->connect_to );
|
||||
|
||||
mirror->client = socket_connect(&mirror->connect_to->generic, connect_from);
|
||||
if ( 0 < mirror->client ) {
|
||||
fd_set fds;
|
||||
struct timeval tv = { MS_HELLO_TIME_SECS, 0};
|
||||
FD_ZERO( &fds );
|
||||
FD_SET( mirror->client, &fds );
|
||||
|
||||
FATAL_UNLESS( 0 <= select( FD_SETSIZE, &fds, NULL, NULL, &tv ),
|
||||
"Select failed." );
|
||||
|
||||
if( FD_ISSET( mirror->client, &fds ) ){
|
||||
off64_t remote_size;
|
||||
if ( socket_nbd_read_hello( mirror->client, &remote_size ) ) {
|
||||
if( remote_size == local_size ){
|
||||
connected = 1;
|
||||
mirror_set_state( mirror, MS_GO );
|
||||
}
|
||||
else {
|
||||
warn("Remote size (%d) doesn't match local (%d)",
|
||||
remote_size, local_size );
|
||||
mirror_set_state( mirror, MS_FAIL_SIZE_MISMATCH );
|
||||
}
|
||||
}
|
||||
else {
|
||||
warn( "Mirror attempt rejected." );
|
||||
mirror_set_state( mirror, MS_FAIL_REJECTED );
|
||||
}
|
||||
}
|
||||
else {
|
||||
warn( "No NBD Hello received." );
|
||||
mirror_set_state( mirror, MS_FAIL_NO_HELLO );
|
||||
}
|
||||
|
||||
if ( !connected ) { close( mirror->client ); }
|
||||
}
|
||||
else {
|
||||
warn( "Mirror failed to connect.");
|
||||
mirror_set_state( mirror, MS_FAIL_CONNECT );
|
||||
}
|
||||
|
||||
return connected;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void mirror_run( struct server *serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
NULLCHECK( serve->mirror );
|
||||
|
||||
int pass;
|
||||
uint64_t written;
|
||||
|
||||
info("Starting mirror" );
|
||||
for (pass=0; pass < mirror_maximum_passes-1; pass++) {
|
||||
|
||||
debug("mirror start pass=%d", pass);
|
||||
if ( !mirror_pass( serve, 1, &written ) ){
|
||||
debug("Failed mirror pass state is %d", mirror_get_state( serve->mirror ) );
|
||||
debug("pass failed, giving up");
|
||||
return; }
|
||||
|
||||
/* if we've not written anything */
|
||||
if (written < mirror_last_pass_after_bytes_written) { break; }
|
||||
}
|
||||
|
||||
server_lock_io( serve );
|
||||
{
|
||||
if ( mirror_pass( serve, 0, &written ) &&
|
||||
ACTION_EXIT == serve->mirror->action_at_finish) {
|
||||
debug("exit!");
|
||||
mirror_on_exit( serve );
|
||||
info("Server closed, quitting "
|
||||
"after successful migration");
|
||||
}
|
||||
}
|
||||
server_unlock_io( serve );
|
||||
}
|
||||
|
||||
|
||||
void mbox_post_mirror_state( struct mbox * mbox, enum mirror_state st )
|
||||
{
|
||||
NULLCHECK( mbox );
|
||||
enum mirror_state * contents = xmalloc( sizeof( enum mirror_state ) );
|
||||
|
||||
*contents = st;
|
||||
|
||||
mbox_post( mbox, contents );
|
||||
}
|
||||
|
||||
|
||||
void mirror_signal_commit( struct mirror * mirror )
|
||||
{
|
||||
NULLCHECK( mirror );
|
||||
|
||||
mbox_post_mirror_state( mirror->commit_signal,
|
||||
mirror_get_state( mirror ) );
|
||||
}
|
||||
|
||||
/** Thread launched to drive mirror process
|
||||
* This is needed for two reasons: firstly, it decouples the mirroring
|
||||
* from the control thread (although that's less valid with mboxes
|
||||
* passing state back and forth) and to provide an error context so that
|
||||
* retries can be cleanly handled without a bespoke error handling
|
||||
* mechanism.
|
||||
* */
|
||||
void* mirror_runner(void* serve_params_uncast)
|
||||
{
|
||||
/* The supervisor thread relies on there not being any ERROR
|
||||
* calls until after the mirror_signal_commit() call in this
|
||||
* function.
|
||||
* However, *after* that, we should call ERROR_* instead of
|
||||
* FATAL_* wherever possible.
|
||||
*/
|
||||
struct server *serve = (struct server*) serve_params_uncast;
|
||||
|
||||
NULLCHECK( serve );
|
||||
NULLCHECK( serve->mirror );
|
||||
struct mirror * mirror = serve->mirror;
|
||||
NULLCHECK( mirror->dirty_map );
|
||||
|
||||
error_set_handler( (cleanup_handler *) mirror_cleanup, serve );
|
||||
|
||||
info( "Connecting to mirror" );
|
||||
|
||||
time_t start_time = time(NULL);
|
||||
int connected = mirror_connect( mirror, serve->size );
|
||||
mirror_signal_commit( mirror );
|
||||
if ( !connected ) { goto abandon_mirror; }
|
||||
|
||||
/* After this point, if we see a failure we need to disconnect
|
||||
* and retry everything from mirror_set_state(_, MS_INIT), but
|
||||
* *without* signaling the commit or abandoning the mirror.
|
||||
* */
|
||||
|
||||
if ( (time(NULL) - start_time) > MS_CONNECT_TIME_SECS ){
|
||||
/* If we get here, then we managed to connect but the
|
||||
* control thread feeding status back to the user will
|
||||
* have gone away, leaving the user without meaningful
|
||||
* feedback. In this instance, they have to assume a
|
||||
* failure, so we can't afford to let the mirror happen.
|
||||
* We have to set the state to avoid a race.
|
||||
*/
|
||||
mirror_set_state( mirror, MS_FAIL_CONNECT );
|
||||
warn( "Mirror connected, but too slowly" );
|
||||
goto abandon_mirror;
|
||||
}
|
||||
|
||||
mirror_run( serve );
|
||||
|
||||
mirror_set_state( mirror, MS_DONE );
|
||||
abandon_mirror:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
struct mirror_super * mirror_super_create(
|
||||
const char * filename,
|
||||
union mysockaddr * connect_to,
|
||||
union mysockaddr * connect_from,
|
||||
int max_Bps,
|
||||
int action_at_finish,
|
||||
struct mbox * state_mbox)
|
||||
{
|
||||
struct mirror_super * super = xmalloc( sizeof( struct mirror_super) );
|
||||
super->mirror = mirror_create(
|
||||
filename,
|
||||
connect_to,
|
||||
connect_from,
|
||||
max_Bps,
|
||||
action_at_finish,
|
||||
mbox_create() ) ;
|
||||
super->state_mbox = state_mbox;
|
||||
return super;
|
||||
}
|
||||
|
||||
|
||||
/* Post the current state of the mirror into super->state_mbox.*/
|
||||
void mirror_super_signal_committed(
|
||||
struct mirror_super * super ,
|
||||
enum mirror_state commit_state )
|
||||
{
|
||||
NULLCHECK( super );
|
||||
NULLCHECK( super->state_mbox );
|
||||
|
||||
mbox_post_mirror_state(
|
||||
super->state_mbox,
|
||||
commit_state );
|
||||
}
|
||||
|
||||
|
||||
void mirror_super_destroy( struct mirror_super * super )
|
||||
{
|
||||
NULLCHECK( super );
|
||||
|
||||
mbox_destroy( super->mirror->commit_signal );
|
||||
mirror_destroy( super->mirror );
|
||||
free( super );
|
||||
}
|
||||
|
||||
|
||||
/* The mirror supervisor thread. Responsible for kicking off retries if
|
||||
* the mirror thread fails.
|
||||
* The mirror and mirror_super objects are never freed, and the
|
||||
* mirror_super_runner thread is never joined.
|
||||
*/
|
||||
void * mirror_super_runner( void * serve_uncast )
|
||||
{
|
||||
struct server * serve = (struct server *) serve_uncast;
|
||||
NULLCHECK( serve );
|
||||
NULLCHECK( serve->mirror );
|
||||
NULLCHECK( serve->mirror_super );
|
||||
|
||||
int first_pass = 1;
|
||||
int should_retry = 0;
|
||||
int success = 0;
|
||||
|
||||
struct mirror * mirror = serve->mirror;
|
||||
struct mirror_super * super = serve->mirror_super;
|
||||
|
||||
do {
|
||||
FATAL_IF( 0 != pthread_create(
|
||||
&mirror->thread,
|
||||
NULL,
|
||||
mirror_runner,
|
||||
serve),
|
||||
"Failed to create mirror thread");
|
||||
|
||||
debug("Supervisor waiting for commit signal");
|
||||
enum mirror_state * commit_state =
|
||||
mbox_receive( mirror->commit_signal );
|
||||
|
||||
debug( "Supervisor got commit signal" );
|
||||
if ( first_pass ) {
|
||||
/* Only retry if the connection attempt was
|
||||
* successful. Otherwise the user will see an
|
||||
* error reported while we're still trying to
|
||||
* retry behind the scenes.
|
||||
*/
|
||||
should_retry = *commit_state == MS_GO;
|
||||
/* Only send this signal the first time */
|
||||
mirror_super_signal_committed(
|
||||
super,
|
||||
*commit_state);
|
||||
debug("Mirror supervisor committed");
|
||||
}
|
||||
/* We only care about the value of the commit signal on
|
||||
* the first pass, so this is ok
|
||||
*/
|
||||
free( commit_state );
|
||||
|
||||
debug("Supervisor waiting for mirror thread" );
|
||||
pthread_join( mirror->thread, NULL );
|
||||
|
||||
success = MS_DONE == mirror_get_state( mirror );
|
||||
|
||||
if( success ){
|
||||
info( "Mirror supervisor success, exiting" ); }
|
||||
else if ( mirror->signal_abandon ) {
|
||||
info( "Mirror abandoned" );
|
||||
should_retry = 0;
|
||||
}
|
||||
else if (should_retry){
|
||||
info( "Mirror failed, retrying" );
|
||||
}
|
||||
else { info( "Mirror failed before commit, giving up" ); }
|
||||
|
||||
first_pass = 0;
|
||||
|
||||
if ( should_retry ) {
|
||||
/* We don't want to hammer the destination too
|
||||
* hard, so if this is a retry, insert a delay. */
|
||||
sleep( MS_RETRY_DELAY_SECS );
|
||||
|
||||
/* We also have to reset the bitmap to be sure
|
||||
* we transfer everything */
|
||||
mirror_reset( mirror );
|
||||
}
|
||||
|
||||
}
|
||||
while ( should_retry && !success );
|
||||
|
||||
serve->mirror = NULL;
|
||||
serve->mirror_super = NULL;
|
||||
|
||||
mirror_super_destroy( super );
|
||||
debug( "Mirror supervisor done." );
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
171
src/proxy-main.c
Normal file
171
src/proxy-main.c
Normal file
@@ -0,0 +1,171 @@
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "mode.h"
|
||||
#include "util.h"
|
||||
#include "proxy.h"
|
||||
|
||||
|
||||
static struct option proxy_options[] = {
|
||||
GETOPT_HELP,
|
||||
GETOPT_ADDR,
|
||||
GETOPT_PORT,
|
||||
GETOPT_CONNECT_ADDR,
|
||||
GETOPT_CONNECT_PORT,
|
||||
GETOPT_BIND,
|
||||
GETOPT_CACHE,
|
||||
GETOPT_QUIET,
|
||||
GETOPT_VERBOSE,
|
||||
{0}
|
||||
};
|
||||
static char proxy_short_options[] = "hl:p:C:P:b:" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char proxy_help_text[] =
|
||||
"Usage: flexnbd-proxy <options>\n\n"
|
||||
"Resiliently proxy an NBD connection between client and server\n"
|
||||
"We can listen on TCP or UNIX socket, but only connect to TCP servers.\n\n"
|
||||
HELP_LINE
|
||||
"\t--" OPT_ADDR ",-l <ADDR>\tThe address we will bind to as a proxy.\n"
|
||||
"\t--" OPT_PORT ",-p <PORT>\tThe port we will bind to as a proxy, if required.\n"
|
||||
"\t--" OPT_CONNECT_ADDR ",-C <ADDR>\tAddress of the proxied server.\n"
|
||||
"\t--" OPT_CONNECT_PORT ",-P <PORT>\tPort of the proxied server.\n"
|
||||
"\t--" OPT_BIND ",-b <ADDR>\tThe address we connect from, as a proxy.\n"
|
||||
"\t--" OPT_CACHE ",-c[=<CACHE-BYTES>]\tUse a RAM read cache of the given size.\n"
|
||||
QUIET_LINE
|
||||
VERBOSE_LINE;
|
||||
|
||||
static char proxy_default_cache_size[] = "4096";
|
||||
|
||||
void read_proxy_param(
|
||||
int c,
|
||||
char **downstream_addr,
|
||||
char **downstream_port,
|
||||
char **upstream_addr,
|
||||
char **upstream_port,
|
||||
char **bind_addr,
|
||||
char **cache_bytes)
|
||||
{
|
||||
switch( c ) {
|
||||
case 'h' :
|
||||
fprintf( stdout, "%s\n", proxy_help_text );
|
||||
exit( 0 );
|
||||
case 'l':
|
||||
*downstream_addr = optarg;
|
||||
break;
|
||||
case 'p':
|
||||
*downstream_port = optarg;
|
||||
break;
|
||||
case 'C':
|
||||
*upstream_addr = optarg;
|
||||
break;
|
||||
case 'P':
|
||||
*upstream_port = optarg;
|
||||
break;
|
||||
case 'b':
|
||||
*bind_addr = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
*cache_bytes = optarg ? optarg : proxy_default_cache_size;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
case 'v':
|
||||
log_level = VERBOSE_LOG_LEVEL;
|
||||
break;
|
||||
default:
|
||||
exit_err( proxy_help_text );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
struct proxier * proxy = NULL;
|
||||
|
||||
void my_exit(int signum)
|
||||
{
|
||||
info( "Exit signalled (%i)", signum );
|
||||
if ( NULL != proxy ) {
|
||||
proxy_cleanup( proxy );
|
||||
};
|
||||
exit( 0 );
|
||||
}
|
||||
|
||||
int main( int argc, char *argv[] )
|
||||
{
|
||||
int c;
|
||||
char *downstream_addr = NULL;
|
||||
char *downstream_port = NULL;
|
||||
char *upstream_addr = NULL;
|
||||
char *upstream_port = NULL;
|
||||
char *bind_addr = NULL;
|
||||
char *cache_bytes = NULL;
|
||||
int success;
|
||||
|
||||
sigset_t mask;
|
||||
struct sigaction exit_action;
|
||||
|
||||
sigemptyset( &mask );
|
||||
sigaddset( &mask, SIGTERM );
|
||||
sigaddset( &mask, SIGQUIT );
|
||||
sigaddset( &mask, SIGINT );
|
||||
|
||||
exit_action.sa_handler = my_exit;
|
||||
exit_action.sa_mask = mask;
|
||||
exit_action.sa_flags = 0;
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
while (1) {
|
||||
c = getopt_long( argc, argv, proxy_short_options, proxy_options, NULL );
|
||||
if ( -1 == c ) { break; }
|
||||
read_proxy_param( c,
|
||||
&downstream_addr,
|
||||
&downstream_port,
|
||||
&upstream_addr,
|
||||
&upstream_port,
|
||||
&bind_addr,
|
||||
&cache_bytes
|
||||
);
|
||||
}
|
||||
|
||||
if ( NULL == downstream_addr ){
|
||||
fprintf( stderr, "--addr is required.\n" );
|
||||
exit_err( proxy_help_text );
|
||||
} else if ( NULL == upstream_addr || NULL == upstream_port ){
|
||||
fprintf( stderr, "both --conn-addr and --conn-port are required.\n" );
|
||||
exit_err( proxy_help_text );
|
||||
}
|
||||
|
||||
proxy = proxy_create(
|
||||
downstream_addr,
|
||||
downstream_port,
|
||||
upstream_addr,
|
||||
upstream_port,
|
||||
bind_addr,
|
||||
cache_bytes
|
||||
);
|
||||
|
||||
/* Set these *after* proxy has been assigned to */
|
||||
sigaction(SIGTERM, &exit_action, NULL);
|
||||
sigaction(SIGQUIT, &exit_action, NULL);
|
||||
sigaction(SIGINT, &exit_action, NULL);
|
||||
signal(SIGPIPE, SIG_IGN); /* calls to splice() unhelpfully throw this */
|
||||
|
||||
if ( NULL != downstream_port ) {
|
||||
info(
|
||||
"Proxying between %s %s (downstream) and %s %s (upstream)",
|
||||
downstream_addr, downstream_port, upstream_addr, upstream_port
|
||||
);
|
||||
} else {
|
||||
info(
|
||||
"Proxying between %s (downstream) and %s %s (upstream)",
|
||||
downstream_addr, upstream_addr, upstream_port
|
||||
);
|
||||
}
|
||||
|
||||
success = do_proxy( proxy );
|
||||
proxy_destroy( proxy );
|
||||
|
||||
return success ? 0 : 1;
|
||||
}
|
||||
|
68
src/proxy/prefetch.c
Normal file
68
src/proxy/prefetch.c
Normal file
@@ -0,0 +1,68 @@
|
||||
#include "prefetch.h"
|
||||
#include "util.h"
|
||||
|
||||
|
||||
struct prefetch* prefetch_create( size_t size_bytes ){
|
||||
|
||||
struct prefetch* out = xmalloc( sizeof( struct prefetch ) );
|
||||
NULLCHECK( out );
|
||||
|
||||
out->buffer = xmalloc( size_bytes );
|
||||
NULLCHECK( out->buffer );
|
||||
|
||||
out->size = size_bytes;
|
||||
out->is_full = 0;
|
||||
out->from = 0;
|
||||
out->len = 0;
|
||||
|
||||
return out;
|
||||
|
||||
}
|
||||
|
||||
void prefetch_destroy( struct prefetch *prefetch ) {
|
||||
if( prefetch ) {
|
||||
free( prefetch->buffer );
|
||||
free( prefetch );
|
||||
}
|
||||
}
|
||||
|
||||
size_t prefetch_size( struct prefetch *prefetch){
|
||||
if ( prefetch ) {
|
||||
return prefetch->size;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void prefetch_set_is_empty( struct prefetch *prefetch ){
|
||||
prefetch_set_full( prefetch, 0 );
|
||||
}
|
||||
|
||||
void prefetch_set_is_full( struct prefetch *prefetch ){
|
||||
prefetch_set_full( prefetch, 1 );
|
||||
}
|
||||
|
||||
void prefetch_set_full( struct prefetch *prefetch, int val ){
|
||||
if( prefetch ) {
|
||||
prefetch->is_full = val;
|
||||
}
|
||||
}
|
||||
|
||||
int prefetch_is_full( struct prefetch *prefetch ){
|
||||
if( prefetch ) {
|
||||
return prefetch->is_full;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int prefetch_contains( struct prefetch *prefetch, uint64_t from, uint32_t len ){
|
||||
NULLCHECK( prefetch );
|
||||
return from >= prefetch->from &&
|
||||
from + len <= prefetch->from + prefetch->len;
|
||||
}
|
||||
|
||||
char *prefetch_offset( struct prefetch *prefetch, uint64_t from ){
|
||||
NULLCHECK( prefetch );
|
||||
return prefetch->buffer + (from - prefetch->from);
|
||||
}
|
33
src/proxy/prefetch.h
Normal file
33
src/proxy/prefetch.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef PREFETCH_H
|
||||
#define PREFETCH_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define PREFETCH_BUFSIZE 4096
|
||||
|
||||
struct prefetch {
|
||||
/* True if there is data in the buffer. */
|
||||
int is_full;
|
||||
/* The start point of the current content of buffer */
|
||||
uint64_t from;
|
||||
/* The length of the current content of buffer */
|
||||
uint32_t len;
|
||||
|
||||
/* The total size of the buffer, in bytes. */
|
||||
size_t size;
|
||||
|
||||
char *buffer;
|
||||
};
|
||||
|
||||
struct prefetch* prefetch_create( size_t size_bytes );
|
||||
void prefetch_destroy( struct prefetch *prefetch );
|
||||
size_t prefetch_size( struct prefetch *);
|
||||
void prefetch_set_is_empty( struct prefetch *prefetch );
|
||||
void prefetch_set_is_full( struct prefetch *prefetch );
|
||||
void prefetch_set_full( struct prefetch *prefetch, int val );
|
||||
int prefetch_is_full( struct prefetch *prefetch );
|
||||
int prefetch_contains( struct prefetch *prefetch, uint64_t from, uint32_t len );
|
||||
char *prefetch_offset( struct prefetch *prefetch, uint64_t from );
|
||||
|
||||
#endif
|
975
src/proxy/proxy.c
Normal file
975
src/proxy/proxy.c
Normal file
@@ -0,0 +1,975 @@
|
||||
#include "proxy.h"
|
||||
#include "readwrite.h"
|
||||
|
||||
#include "prefetch.h"
|
||||
|
||||
|
||||
#include "ioutil.h"
|
||||
#include "sockutil.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/tcp.h>
|
||||
|
||||
struct proxier* proxy_create(
|
||||
char* s_downstream_address,
|
||||
char* s_downstream_port,
|
||||
char* s_upstream_address,
|
||||
char* s_upstream_port,
|
||||
char* s_upstream_bind,
|
||||
char* s_cache_bytes )
|
||||
{
|
||||
struct proxier* out;
|
||||
out = xmalloc( sizeof( struct proxier ) );
|
||||
|
||||
FATAL_IF_NULL(s_downstream_address, "Listen address not specified");
|
||||
NULLCHECK( s_downstream_address );
|
||||
|
||||
FATAL_UNLESS(
|
||||
parse_to_sockaddr( &out->listen_on.generic, s_downstream_address ),
|
||||
"Couldn't parse downstream address %s"
|
||||
);
|
||||
|
||||
if ( out->listen_on.family != AF_UNIX ) {
|
||||
FATAL_IF_NULL( s_downstream_port, "Downstream port not specified" );
|
||||
NULLCHECK( s_downstream_port );
|
||||
parse_port( s_downstream_port, &out->listen_on.v4 );
|
||||
}
|
||||
|
||||
FATAL_IF_NULL(s_upstream_address, "Upstream address not specified");
|
||||
NULLCHECK( s_upstream_address );
|
||||
|
||||
FATAL_UNLESS(
|
||||
parse_ip_to_sockaddr( &out->connect_to.generic, s_upstream_address ),
|
||||
"Couldn't parse upstream address '%s'",
|
||||
s_upstream_address
|
||||
);
|
||||
|
||||
FATAL_IF_NULL( s_upstream_port, "Upstream port not specified" );
|
||||
NULLCHECK( s_upstream_port );
|
||||
parse_port( s_upstream_port, &out->connect_to.v4 );
|
||||
|
||||
if ( s_upstream_bind ) {
|
||||
FATAL_IF_ZERO(
|
||||
parse_ip_to_sockaddr( &out->connect_from.generic, s_upstream_bind ),
|
||||
"Couldn't parse bind address '%s'",
|
||||
s_upstream_bind
|
||||
);
|
||||
out->bind = 1;
|
||||
}
|
||||
|
||||
out->listen_fd = -1;
|
||||
out->downstream_fd = -1;
|
||||
out->upstream_fd = -1;
|
||||
|
||||
out->prefetch = NULL;
|
||||
if ( s_cache_bytes ){
|
||||
int cache_bytes = atoi( s_cache_bytes );
|
||||
/* leaving this off or setting a cache size of zero or
|
||||
* less results in no cache.
|
||||
*/
|
||||
if ( cache_bytes >= 0 ) {
|
||||
out->prefetch = prefetch_create( cache_bytes );
|
||||
}
|
||||
}
|
||||
|
||||
out->init.buf = xmalloc( sizeof( struct nbd_init_raw ) );
|
||||
out->req.buf = xmalloc( NBD_MAX_SIZE );
|
||||
out->rsp.buf = xmalloc( NBD_MAX_SIZE );
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int proxy_prefetches( struct proxier* proxy ) {
|
||||
NULLCHECK( proxy );
|
||||
return proxy->prefetch != NULL;
|
||||
}
|
||||
|
||||
int proxy_prefetch_bufsize( struct proxier* proxy ){
|
||||
NULLCHECK( proxy );
|
||||
return prefetch_size( proxy->prefetch );
|
||||
}
|
||||
|
||||
void proxy_destroy( struct proxier* proxy )
|
||||
{
|
||||
free( proxy->init.buf );
|
||||
free( proxy->req.buf );
|
||||
free( proxy->rsp.buf );
|
||||
prefetch_destroy( proxy->prefetch );
|
||||
|
||||
free( proxy );
|
||||
}
|
||||
|
||||
/* Shared between our two different connect_to_upstream paths */
|
||||
void proxy_finish_connect_to_upstream( struct proxier *proxy, uint64_t size );
|
||||
|
||||
/* Try to establish a connection to our upstream server. Return 1 on success,
|
||||
* 0 on failure. this is a blocking call that returns a non-blocking socket.
|
||||
*/
|
||||
int proxy_connect_to_upstream( struct proxier* proxy )
|
||||
{
|
||||
struct sockaddr* connect_from = NULL;
|
||||
if ( proxy->bind ) {
|
||||
connect_from = &proxy->connect_from.generic;
|
||||
}
|
||||
|
||||
int fd = socket_connect( &proxy->connect_to.generic, connect_from );
|
||||
uint64_t size = 0;
|
||||
|
||||
if ( -1 == fd ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( !socket_nbd_read_hello( fd, &size ) ) {
|
||||
WARN_IF_NEGATIVE(
|
||||
sock_try_close( fd ),
|
||||
"Couldn't close() after failed read of NBD hello on fd %i", fd
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
proxy->upstream_fd = fd;
|
||||
sock_set_nonblock( fd, 1 );
|
||||
proxy_finish_connect_to_upstream( proxy, size );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* First half of non-blocking connection to upstream. Gets as far as calling
|
||||
* connect() on a non-blocking socket.
|
||||
*/
|
||||
void proxy_start_connect_to_upstream( struct proxier* proxy )
|
||||
{
|
||||
int fd, result;
|
||||
struct sockaddr* from = NULL;
|
||||
struct sockaddr* to = &proxy->connect_to.generic;
|
||||
|
||||
if ( proxy->bind ) {
|
||||
from = &proxy->connect_from.generic;
|
||||
}
|
||||
|
||||
fd = socket( to->sa_family , SOCK_STREAM, 0 );
|
||||
|
||||
if( fd < 0 ) {
|
||||
warn( SHOW_ERRNO( "Couldn't create socket to reconnect to upstream" ) );
|
||||
return;
|
||||
}
|
||||
|
||||
info( "Beginning non-blocking connection to upstream on fd %i", fd );
|
||||
|
||||
if ( NULL != from ) {
|
||||
if ( 0 > bind( fd, from, sockaddr_size( from ) ) ) {
|
||||
warn( SHOW_ERRNO( "bind() to source address failed" ) );
|
||||
}
|
||||
}
|
||||
|
||||
result = sock_set_nonblock( fd, 1 );
|
||||
if ( result == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to set upstream fd %i non-blocking", fd ) );
|
||||
goto error;
|
||||
}
|
||||
|
||||
result = connect( fd, to, sockaddr_size( to ) );
|
||||
if ( result == -1 && errno != EINPROGRESS ) {
|
||||
warn( SHOW_ERRNO( "Failed to start connect()ing to upstream!" ) );
|
||||
goto error;
|
||||
}
|
||||
|
||||
proxy->upstream_fd = fd;
|
||||
return;
|
||||
|
||||
error:
|
||||
if ( sock_try_close( fd ) == -1 ) {
|
||||
/* Non-fatal leak, although still nasty */
|
||||
warn( SHOW_ERRNO( "Failed to close fd for upstream %i", fd ) );
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void proxy_finish_connect_to_upstream( struct proxier *proxy, uint64_t size ) {
|
||||
|
||||
if ( proxy->upstream_size == 0 ) {
|
||||
info( "Size of upstream image is %"PRIu64" bytes", size );
|
||||
} else if ( proxy->upstream_size != size ) {
|
||||
warn(
|
||||
"Size changed from %"PRIu64" to %"PRIu64" bytes",
|
||||
proxy->upstream_size, size
|
||||
);
|
||||
}
|
||||
|
||||
proxy->upstream_size = size;
|
||||
|
||||
if ( AF_UNIX != proxy->connect_to.family ) {
|
||||
if ( sock_set_tcp_nodelay( proxy->upstream_fd, 1 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to set TCP_NODELAY" ) );
|
||||
}
|
||||
}
|
||||
|
||||
info( "Connected to upstream on fd %i", proxy->upstream_fd );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void proxy_disconnect_from_upstream( struct proxier* proxy )
|
||||
{
|
||||
if ( -1 != proxy->upstream_fd ) {
|
||||
info("Closing upstream connection on fd %i", proxy->upstream_fd );
|
||||
|
||||
/* TODO: An NBD disconnect would be pleasant here */
|
||||
WARN_IF_NEGATIVE(
|
||||
sock_try_close( proxy->upstream_fd ),
|
||||
"Failed to close() fd %i when disconnecting from upstream",
|
||||
proxy->upstream_fd
|
||||
);
|
||||
proxy->upstream_fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Prepares a listening socket for the NBD server, binding etc. */
|
||||
void proxy_open_listen_socket(struct proxier* params)
|
||||
{
|
||||
NULLCHECK( params );
|
||||
|
||||
params->listen_fd = socket(params->listen_on.family, SOCK_STREAM, 0);
|
||||
FATAL_IF_NEGATIVE(
|
||||
params->listen_fd, SHOW_ERRNO( "Couldn't create listen socket" )
|
||||
);
|
||||
|
||||
/* Allow us to restart quickly */
|
||||
FATAL_IF_NEGATIVE(
|
||||
sock_set_reuseaddr(params->listen_fd, 1),
|
||||
SHOW_ERRNO( "Couldn't set SO_REUSEADDR" )
|
||||
);
|
||||
|
||||
if( AF_UNIX != params->listen_on.family ) {
|
||||
FATAL_IF_NEGATIVE(
|
||||
sock_set_tcp_nodelay(params->listen_fd, 1),
|
||||
SHOW_ERRNO( "Couldn't set TCP_NODELAY" )
|
||||
);
|
||||
}
|
||||
|
||||
FATAL_UNLESS_ZERO(
|
||||
sock_try_bind( params->listen_fd, ¶ms->listen_on.generic ),
|
||||
SHOW_ERRNO( "Failed to bind to listening socket" )
|
||||
);
|
||||
|
||||
/* We're only serving one client at a time, hence backlog of 1 */
|
||||
FATAL_IF_NEGATIVE(
|
||||
listen(params->listen_fd, 1),
|
||||
SHOW_ERRNO( "Failed to listen on listening socket" )
|
||||
);
|
||||
|
||||
info( "Now listening for incoming connections" );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
EXIT,
|
||||
WRITE_TO_DOWNSTREAM,
|
||||
READ_FROM_DOWNSTREAM,
|
||||
CONNECT_TO_UPSTREAM,
|
||||
READ_INIT_FROM_UPSTREAM,
|
||||
WRITE_TO_UPSTREAM,
|
||||
READ_FROM_UPSTREAM
|
||||
} proxy_session_states;
|
||||
|
||||
static char* proxy_session_state_names[] = {
|
||||
"EXIT",
|
||||
"WRITE_TO_DOWNSTREAM",
|
||||
"READ_FROM_DOWNSTREAM",
|
||||
"CONNECT_TO_UPSTREAM",
|
||||
"READ_INIT_FROM_UPSTREAM",
|
||||
"WRITE_TO_UPSTREAM",
|
||||
"READ_FROM_UPSTREAM"
|
||||
};
|
||||
|
||||
static inline int proxy_state_upstream( int state )
|
||||
{
|
||||
return state == CONNECT_TO_UPSTREAM || state == READ_INIT_FROM_UPSTREAM ||
|
||||
state == WRITE_TO_UPSTREAM || state == READ_FROM_UPSTREAM;
|
||||
}
|
||||
|
||||
int proxy_prefetch_for_request( struct proxier* proxy, int state )
|
||||
{
|
||||
NULLCHECK( proxy );
|
||||
struct nbd_request* req = &proxy->req_hdr;
|
||||
struct nbd_reply* rsp = &proxy->rsp_hdr;
|
||||
|
||||
struct nbd_request_raw* req_raw = (struct nbd_request_raw*) proxy->req.buf;
|
||||
struct nbd_reply_raw *rsp_raw = (struct nbd_reply_raw*) proxy->rsp.buf;
|
||||
|
||||
int is_read = ( req->type & REQUEST_MASK ) == REQUEST_READ;
|
||||
|
||||
if ( is_read ) {
|
||||
/* See if we can respond with what's in our prefetch
|
||||
* cache */
|
||||
if ( prefetch_is_full( proxy->prefetch ) &&
|
||||
prefetch_contains( proxy->prefetch, req->from, req->len ) ) {
|
||||
/* HUZZAH! A match! */
|
||||
debug( "Prefetch hit!" );
|
||||
|
||||
/* First build a reply header */
|
||||
rsp->magic = REPLY_MAGIC;
|
||||
rsp->error = 0;
|
||||
memcpy( &rsp->handle, &req->handle, 8 );
|
||||
|
||||
/* now copy it into the response */
|
||||
nbd_h2r_reply( rsp, rsp_raw );
|
||||
|
||||
/* and the data */
|
||||
memcpy(
|
||||
proxy->rsp.buf + NBD_REPLY_SIZE,
|
||||
prefetch_offset( proxy->prefetch, req->from ),
|
||||
req->len
|
||||
);
|
||||
|
||||
proxy->rsp.size = NBD_REPLY_SIZE + req->len;
|
||||
proxy->rsp.needle = 0;
|
||||
|
||||
/* return early, our work here is done */
|
||||
return WRITE_TO_DOWNSTREAM;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Safety catch. If we're sending a write request, we
|
||||
* blow away the cache. This is very pessimistic, but
|
||||
* it's simpler (and therefore safer) than working out
|
||||
* whether we can keep it or not.
|
||||
*/
|
||||
debug( "Blowing away prefetch cache on type %d request.", req->type );
|
||||
prefetch_set_is_empty( proxy->prefetch );
|
||||
}
|
||||
|
||||
debug( "Prefetch cache MISS!");
|
||||
|
||||
uint64_t prefetch_start = req->from;
|
||||
/* We prefetch what we expect to be the next request. */
|
||||
uint64_t prefetch_end = req->from + ( req->len * 2 );
|
||||
|
||||
/* We only want to consider prefetching if we know we're not
|
||||
* getting too much data back, if it's a read request, and if
|
||||
* the prefetch won't try to read past the end of the file.
|
||||
*/
|
||||
int prefetching =
|
||||
req->len <= prefetch_size( proxy->prefetch ) &&
|
||||
is_read &&
|
||||
prefetch_start < prefetch_end &&
|
||||
prefetch_end <= proxy->upstream_size;
|
||||
|
||||
/* We pull the request out of the proxy struct, rewrite the
|
||||
* request size, and write it back.
|
||||
*/
|
||||
if ( prefetching ) {
|
||||
proxy->is_prefetch_req = 1;
|
||||
proxy->prefetch_req_orig_len = req->len;
|
||||
|
||||
req->len *= 2;
|
||||
|
||||
debug( "Prefetching additional %"PRIu32" bytes",
|
||||
req->len - proxy->prefetch_req_orig_len );
|
||||
nbd_h2r_request( req, req_raw );
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
int proxy_prefetch_for_reply( struct proxier* proxy, int state )
|
||||
{
|
||||
size_t prefetched_bytes;
|
||||
|
||||
if ( !proxy->is_prefetch_req ) {
|
||||
return state;
|
||||
}
|
||||
|
||||
prefetched_bytes = proxy->req_hdr.len - proxy->prefetch_req_orig_len;
|
||||
|
||||
debug( "Prefetched additional %d bytes", prefetched_bytes );
|
||||
memcpy(
|
||||
proxy->prefetch->buffer,
|
||||
proxy->rsp.buf + proxy->prefetch_req_orig_len + NBD_REPLY_SIZE,
|
||||
prefetched_bytes
|
||||
);
|
||||
|
||||
proxy->prefetch->from = proxy->req_hdr.from + proxy->prefetch_req_orig_len;
|
||||
proxy->prefetch->len = prefetched_bytes;
|
||||
|
||||
/* We've finished with proxy->req by now, so don't need to alter it to make
|
||||
* it look like the request was before prefetch */
|
||||
|
||||
/* Truncate the bytes we'll write downstream */
|
||||
proxy->req_hdr.len = proxy->prefetch_req_orig_len;
|
||||
proxy->rsp.size -= prefetched_bytes;
|
||||
|
||||
/* And we need to reset these */
|
||||
prefetch_set_is_full( proxy->prefetch );
|
||||
proxy->is_prefetch_req = 0;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int proxy_read_from_downstream( struct proxier *proxy, int state )
|
||||
{
|
||||
ssize_t count;
|
||||
|
||||
struct nbd_request_raw* request_raw = (struct nbd_request_raw*) proxy->req.buf;
|
||||
struct nbd_request* request = &(proxy->req_hdr);
|
||||
|
||||
// assert( state == READ_FROM_DOWNSTREAM );
|
||||
|
||||
count = iobuf_read( proxy->downstream_fd, &proxy->req, NBD_REQUEST_SIZE );
|
||||
|
||||
if ( count == -1 ) {
|
||||
warn( SHOW_ERRNO( "Couldn't read request from downstream" ) );
|
||||
return EXIT;
|
||||
}
|
||||
|
||||
if ( proxy->req.needle == NBD_REQUEST_SIZE ) {
|
||||
nbd_r2h_request( request_raw, request );
|
||||
|
||||
if ( ( request->type & REQUEST_MASK ) == REQUEST_DISCONNECT ) {
|
||||
info( "Received disconnect request from client" );
|
||||
return EXIT;
|
||||
}
|
||||
|
||||
/* Simple validations */
|
||||
if ( ( request->type & REQUEST_MASK ) == REQUEST_READ ) {
|
||||
if (request->len > ( NBD_MAX_SIZE - NBD_REPLY_SIZE ) ) {
|
||||
warn( "NBD read request size %"PRIu32" too large", request->len );
|
||||
return EXIT;
|
||||
}
|
||||
}
|
||||
if ( (request->type & REQUEST_MASK ) == REQUEST_WRITE ) {
|
||||
if (request->len > ( NBD_MAX_SIZE - NBD_REQUEST_SIZE ) ) {
|
||||
warn( "NBD write request size %"PRIu32" too large", request->len );
|
||||
return EXIT;
|
||||
}
|
||||
|
||||
proxy->req.size += request->len;
|
||||
}
|
||||
}
|
||||
|
||||
if ( proxy->req.needle == proxy->req.size ) {
|
||||
debug(
|
||||
"Received NBD request from downstream. type=%"PRIu32" from=%"PRIu64" len=%"PRIu32,
|
||||
request->type, request->from, request->len
|
||||
);
|
||||
|
||||
/* Finished reading, so advance state. Leave size untouched so the next
|
||||
* state knows how many bytes to write */
|
||||
proxy->req.needle = 0;
|
||||
return WRITE_TO_UPSTREAM;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
int proxy_continue_connecting_to_upstream( struct proxier* proxy, int state )
|
||||
{
|
||||
int error, result;
|
||||
socklen_t len = sizeof( error );
|
||||
|
||||
// assert( state == CONNECT_TO_UPSTREAM );
|
||||
|
||||
result = getsockopt(
|
||||
proxy->upstream_fd, SOL_SOCKET, SO_ERROR, &error, &len
|
||||
);
|
||||
|
||||
if ( result == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to tell if connected to upstream" ) );
|
||||
return state;
|
||||
}
|
||||
|
||||
if ( error != 0 ) {
|
||||
errno = error;
|
||||
warn( SHOW_ERRNO( "Failed to connect to upstream" ) );
|
||||
return state;
|
||||
}
|
||||
|
||||
/* Data may have changed while we were disconnected */
|
||||
prefetch_set_is_empty( proxy->prefetch );
|
||||
|
||||
info( "Connected to upstream on fd %i", proxy->upstream_fd );
|
||||
return READ_INIT_FROM_UPSTREAM;
|
||||
}
|
||||
|
||||
int proxy_read_init_from_upstream( struct proxier* proxy, int state )
|
||||
{
|
||||
ssize_t count;
|
||||
|
||||
// assert( state == READ_INIT_FROM_UPSTREAM );
|
||||
|
||||
count = iobuf_read( proxy->upstream_fd, &proxy->init, sizeof( struct nbd_init_raw ) );
|
||||
|
||||
if ( count == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to read init from upstream" ) );
|
||||
goto disconnect;
|
||||
}
|
||||
|
||||
if ( proxy->init.needle == proxy->init.size ) {
|
||||
uint64_t upstream_size;
|
||||
if ( !nbd_check_hello( (struct nbd_init_raw*) proxy->init.buf, &upstream_size ) ) {
|
||||
warn( "Upstream sent invalid init" );
|
||||
goto disconnect;
|
||||
}
|
||||
|
||||
/* Currently, we only get disconnected from upstream (so needing to come
|
||||
* here) when we have an outstanding request. If that becomes false,
|
||||
* we'll need to choose the right state to return to here */
|
||||
proxy->init.needle = 0;
|
||||
return WRITE_TO_UPSTREAM;
|
||||
}
|
||||
|
||||
return state;
|
||||
|
||||
disconnect:
|
||||
proxy->init.needle = 0;
|
||||
proxy->init.size = 0;
|
||||
return CONNECT_TO_UPSTREAM;
|
||||
}
|
||||
|
||||
int proxy_write_to_upstream( struct proxier* proxy, int state )
|
||||
{
|
||||
ssize_t count;
|
||||
|
||||
// assert( state == WRITE_TO_UPSTREAM );
|
||||
|
||||
/* FIXME: We may set cork=1 multiple times as a result of this idiom.
|
||||
* Not a serious problem, but we could do better
|
||||
*/
|
||||
if ( proxy->req.needle == 0 && AF_UNIX != proxy->connect_to.family ) {
|
||||
if ( sock_set_tcp_cork( proxy->upstream_fd, 1 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to set TCP_CORK" ) );
|
||||
}
|
||||
}
|
||||
|
||||
count = iobuf_write( proxy->upstream_fd, &proxy->req );
|
||||
|
||||
if ( count == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to send request to upstream" ) );
|
||||
proxy->req.needle = 0;
|
||||
// We're throwing the socket away so no need to uncork
|
||||
return CONNECT_TO_UPSTREAM;
|
||||
}
|
||||
|
||||
if ( proxy->req.needle == proxy->req.size ) {
|
||||
/* Request sent. Advance to reading the response from upstream. We might
|
||||
* still need req.size if reading the reply fails - we disconnect
|
||||
* and resend the reply in that case - so keep it around for now. */
|
||||
proxy->req.needle = 0;
|
||||
|
||||
if ( AF_UNIX != proxy->connect_to.family ) {
|
||||
if ( sock_set_tcp_cork( proxy->upstream_fd, 0 ) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to unset TCP_CORK" ) );
|
||||
// TODO: should we return to CONNECT_TO_UPSTREAM in this instance?
|
||||
}
|
||||
}
|
||||
|
||||
return READ_FROM_UPSTREAM;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
int proxy_read_from_upstream( struct proxier* proxy, int state )
|
||||
{
|
||||
ssize_t count;
|
||||
|
||||
struct nbd_reply* reply = &(proxy->rsp_hdr);
|
||||
struct nbd_reply_raw* reply_raw = (struct nbd_reply_raw*) proxy->rsp.buf;
|
||||
|
||||
/* We can't assume the NBD_REPLY_SIZE + req->len is what we'll get back */
|
||||
count = iobuf_read( proxy->upstream_fd, &proxy->rsp, NBD_REPLY_SIZE );
|
||||
|
||||
if ( count == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to get reply from upstream" ) );
|
||||
goto disconnect;
|
||||
}
|
||||
|
||||
if ( proxy->rsp.needle == NBD_REPLY_SIZE ) {
|
||||
nbd_r2h_reply( reply_raw, reply );
|
||||
|
||||
if ( reply->magic != REPLY_MAGIC ) {
|
||||
warn( "Reply magic is incorrect" );
|
||||
goto disconnect;
|
||||
}
|
||||
|
||||
if ( reply->error != 0 ) {
|
||||
warn( "NBD error returned from upstream: %"PRIu32, reply->error );
|
||||
goto disconnect;
|
||||
}
|
||||
|
||||
if ( ( proxy->req_hdr.type & REQUEST_MASK ) == REQUEST_READ ) {
|
||||
/* Get the read reply data too. */
|
||||
proxy->rsp.size += proxy->req_hdr.len;
|
||||
}
|
||||
}
|
||||
|
||||
if ( proxy->rsp.size == proxy->rsp.needle ) {
|
||||
debug( "NBD reply received from upstream." );
|
||||
proxy->rsp.needle = 0;
|
||||
return WRITE_TO_DOWNSTREAM;
|
||||
}
|
||||
|
||||
return state;
|
||||
|
||||
disconnect:
|
||||
proxy->rsp.needle = 0;
|
||||
proxy->rsp.size = 0;
|
||||
return CONNECT_TO_UPSTREAM;
|
||||
}
|
||||
|
||||
|
||||
int proxy_write_to_downstream( struct proxier* proxy, int state )
|
||||
{
|
||||
ssize_t count;
|
||||
|
||||
// assert( state == WRITE_TO_DOWNSTREAM );
|
||||
|
||||
if ( !proxy->hello_sent ) {
|
||||
info( "Writing init to downstream" );
|
||||
}
|
||||
|
||||
count = iobuf_write( proxy->downstream_fd, &proxy->rsp );
|
||||
|
||||
if ( count == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to write to downstream" ) );
|
||||
return EXIT;
|
||||
}
|
||||
|
||||
if ( proxy->rsp.needle == proxy->rsp.size ) {
|
||||
if ( !proxy->hello_sent ) {
|
||||
info( "Hello message sent to client" );
|
||||
proxy->hello_sent = 1;
|
||||
} else {
|
||||
debug( "Reply sent" );
|
||||
proxy->req_count++;
|
||||
}
|
||||
|
||||
/* We're done with the request & response buffers now */
|
||||
proxy->req.size = 0;
|
||||
proxy->req.needle = 0;
|
||||
proxy->rsp.size = 0;
|
||||
proxy->rsp.needle = 0;
|
||||
return READ_FROM_DOWNSTREAM;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
/* Non-blocking proxy session. Simple(ish) state machine. We read from d/s until
|
||||
* we have a full request, then try to write that request u/s. If writing fails,
|
||||
* we reconnect to upstream and retry. Once we've successfully written, we
|
||||
* attempt to read the reply. If that fails or times out (we give it 30 seconds)
|
||||
* then we disconnect from u/s and go back to trying to reconnect and resend.
|
||||
*
|
||||
* This is the second-simplest NBD proxy I can think of. The first version was
|
||||
* non-blocking I/O, but it was getting impossible to manage exceptional stuff
|
||||
*/
|
||||
void proxy_session( struct proxier* proxy )
|
||||
{
|
||||
uint64_t state_started = monotonic_time_ms();
|
||||
int old_state = EXIT;
|
||||
int state;
|
||||
int connect_to_upstream_cooldown = 0;
|
||||
|
||||
|
||||
/* First action: Write hello to downstream */
|
||||
nbd_hello_to_buf( (struct nbd_init_raw *) proxy->rsp.buf, proxy->upstream_size );
|
||||
proxy->rsp.size = sizeof( struct nbd_init_raw );
|
||||
proxy->rsp.needle = 0;
|
||||
state = WRITE_TO_DOWNSTREAM;
|
||||
|
||||
|
||||
info( "Beginning proxy session on fd %i", proxy->downstream_fd );
|
||||
|
||||
while( state != EXIT ) {
|
||||
|
||||
struct timeval select_timeout = {
|
||||
.tv_sec = 0,
|
||||
.tv_usec = 0
|
||||
};
|
||||
|
||||
struct timeval *select_timeout_ptr = NULL;
|
||||
|
||||
int result; /* used by select() */
|
||||
|
||||
fd_set rfds;
|
||||
fd_set wfds;
|
||||
|
||||
FD_ZERO( &rfds );
|
||||
FD_ZERO( &wfds );
|
||||
|
||||
if ( state != old_state ) {
|
||||
state_started = monotonic_time_ms();
|
||||
|
||||
debug(
|
||||
"State transition from %s to %s",
|
||||
proxy_session_state_names[old_state],
|
||||
proxy_session_state_names[state]
|
||||
);
|
||||
} else {
|
||||
debug( "Proxy is in state %s", proxy_session_state_names[state], state );
|
||||
}
|
||||
|
||||
old_state = state;
|
||||
|
||||
switch( state ) {
|
||||
case READ_FROM_DOWNSTREAM:
|
||||
FD_SET( proxy->downstream_fd, &rfds );
|
||||
break;
|
||||
case WRITE_TO_DOWNSTREAM:
|
||||
FD_SET( proxy->downstream_fd, &wfds );
|
||||
break;
|
||||
case WRITE_TO_UPSTREAM:
|
||||
select_timeout.tv_sec = 15;
|
||||
FD_SET(proxy->upstream_fd, &wfds );
|
||||
break;
|
||||
case CONNECT_TO_UPSTREAM:
|
||||
/* upstream_fd is now -1 */
|
||||
proxy_disconnect_from_upstream( proxy );
|
||||
|
||||
if ( connect_to_upstream_cooldown ) {
|
||||
connect_to_upstream_cooldown = 0;
|
||||
select_timeout.tv_sec = 3;
|
||||
} else {
|
||||
proxy_start_connect_to_upstream( proxy );
|
||||
|
||||
if ( proxy->upstream_fd == -1 ) {
|
||||
warn( SHOW_ERRNO( "Error acquiring socket to upstream" ) );
|
||||
continue;
|
||||
}
|
||||
FD_SET( proxy->upstream_fd, &wfds );
|
||||
select_timeout.tv_sec = 15;
|
||||
}
|
||||
break;
|
||||
case READ_INIT_FROM_UPSTREAM:
|
||||
case READ_FROM_UPSTREAM:
|
||||
select_timeout.tv_sec = 15;
|
||||
FD_SET( proxy->upstream_fd, &rfds );
|
||||
break;
|
||||
};
|
||||
|
||||
if ( select_timeout.tv_sec > 0 ) {
|
||||
select_timeout_ptr = &select_timeout;
|
||||
}
|
||||
|
||||
result = sock_try_select( FD_SETSIZE, &rfds, &wfds, NULL, select_timeout_ptr );
|
||||
|
||||
if ( result == -1 ) {
|
||||
warn( SHOW_ERRNO( "select() failed: " ) );
|
||||
break;
|
||||
}
|
||||
|
||||
/* Happens after failed reconnect. Avoid SIGBUS on FD_ISSET() */
|
||||
if ( proxy->upstream_fd == -1 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch( state ) {
|
||||
case READ_FROM_DOWNSTREAM:
|
||||
if ( FD_ISSET( proxy->downstream_fd, &rfds ) ) {
|
||||
state = proxy_read_from_downstream( proxy, state );
|
||||
/* Check if we can fulfil the request from prefetch, or
|
||||
* rewrite the request to fill the prefetch buffer if needed
|
||||
*/
|
||||
if ( proxy_prefetches( proxy ) && state == WRITE_TO_UPSTREAM ) {
|
||||
state = proxy_prefetch_for_request( proxy, state );
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CONNECT_TO_UPSTREAM:
|
||||
if ( FD_ISSET( proxy->upstream_fd, &wfds ) ) {
|
||||
state = proxy_continue_connecting_to_upstream( proxy, state );
|
||||
}
|
||||
/* Leaving state untouched will retry connecting to upstream -
|
||||
* so introduce a bit of sleep */
|
||||
if ( state == CONNECT_TO_UPSTREAM ) {
|
||||
connect_to_upstream_cooldown = 1;
|
||||
}
|
||||
|
||||
break;
|
||||
case READ_INIT_FROM_UPSTREAM:
|
||||
state = proxy_read_init_from_upstream( proxy, state );
|
||||
|
||||
if ( state == CONNECT_TO_UPSTREAM ) {
|
||||
connect_to_upstream_cooldown = 1;
|
||||
}
|
||||
|
||||
break;
|
||||
case WRITE_TO_UPSTREAM:
|
||||
if ( FD_ISSET( proxy->upstream_fd, &wfds ) ) {
|
||||
state = proxy_write_to_upstream( proxy, state );
|
||||
}
|
||||
break;
|
||||
case READ_FROM_UPSTREAM:
|
||||
if ( FD_ISSET( proxy->upstream_fd, &rfds ) ) {
|
||||
state = proxy_read_from_upstream( proxy, state );
|
||||
}
|
||||
/* Fill the prefetch buffer and rewrite the reply, if needed */
|
||||
if ( proxy_prefetches( proxy ) && state == WRITE_TO_DOWNSTREAM ) {
|
||||
state = proxy_prefetch_for_reply( proxy, state );
|
||||
}
|
||||
break;
|
||||
case WRITE_TO_DOWNSTREAM:
|
||||
if ( FD_ISSET( proxy->downstream_fd, &wfds ) ) {
|
||||
state = proxy_write_to_downstream( proxy, state );
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* In these states, we're interested in restarting after a timeout.
|
||||
*/
|
||||
if ( old_state == state && proxy_state_upstream( state ) ) {
|
||||
if ( ( monotonic_time_ms() ) - state_started > UPSTREAM_TIMEOUT ) {
|
||||
warn(
|
||||
"Timed out in state %s while communicating with upstream",
|
||||
proxy_session_state_names[state]
|
||||
);
|
||||
state = CONNECT_TO_UPSTREAM;
|
||||
|
||||
/* Since we've timed out, we won't have gone through the timeout logic
|
||||
* in the various state handlers that resets these appropriately... */
|
||||
proxy->init.size = 0;
|
||||
proxy->init.needle = 0;
|
||||
proxy->rsp.size = 0;
|
||||
proxy->rsp.needle = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info(
|
||||
"Finished proxy session on fd %i after %"PRIu64" successful request(s)",
|
||||
proxy->downstream_fd, proxy->req_count
|
||||
);
|
||||
|
||||
/* Reset these two for the next session */
|
||||
proxy->req_count = 0;
|
||||
proxy->hello_sent = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/** Accept an NBD socket connection, dispatch appropriately */
|
||||
int proxy_accept( struct proxier* params )
|
||||
{
|
||||
NULLCHECK( params );
|
||||
|
||||
int client_fd;
|
||||
fd_set fds;
|
||||
|
||||
union mysockaddr client_address;
|
||||
socklen_t socklen = sizeof( client_address );
|
||||
|
||||
info( "Waiting for client connection" );
|
||||
|
||||
FD_ZERO(&fds);
|
||||
FD_SET(params->listen_fd, &fds);
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
sock_try_select(FD_SETSIZE, &fds, NULL, NULL, NULL),
|
||||
SHOW_ERRNO( "select() failed" )
|
||||
);
|
||||
|
||||
if ( FD_ISSET( params->listen_fd, &fds ) ) {
|
||||
client_fd = accept( params->listen_fd, &client_address.generic, &socklen );
|
||||
|
||||
if ( client_address.family != AF_UNIX ) {
|
||||
if ( sock_set_tcp_nodelay(client_fd, 1) == -1 ) {
|
||||
warn( SHOW_ERRNO( "Failed to set TCP_NODELAY" ) );
|
||||
}
|
||||
}
|
||||
|
||||
info( "Accepted nbd client socket fd %d", client_fd );
|
||||
sock_set_nonblock( client_fd, 1 );
|
||||
params->downstream_fd = client_fd;
|
||||
proxy_session( params );
|
||||
|
||||
WARN_IF_NEGATIVE(
|
||||
sock_try_close( params->downstream_fd ),
|
||||
"Couldn't close() downstram fd %i after proxy session",
|
||||
params->downstream_fd
|
||||
);
|
||||
params->downstream_fd = -1;
|
||||
}
|
||||
|
||||
return 1; /* We actually expect to be interrupted by signal handlers */
|
||||
}
|
||||
|
||||
|
||||
void proxy_accept_loop( struct proxier* params )
|
||||
{
|
||||
NULLCHECK( params );
|
||||
while( proxy_accept( params ) );
|
||||
}
|
||||
|
||||
/** Closes sockets */
|
||||
void proxy_cleanup( struct proxier* proxy )
|
||||
{
|
||||
NULLCHECK( proxy );
|
||||
|
||||
info( "Cleaning up" );
|
||||
|
||||
if ( -1 != proxy->listen_fd ) {
|
||||
|
||||
if ( AF_UNIX == proxy->listen_on.family ) {
|
||||
if ( -1 == unlink( proxy->listen_on.un.sun_path ) ) {
|
||||
warn( SHOW_ERRNO( "Failed to unlink %s", proxy->listen_on.un.sun_path ) );
|
||||
}
|
||||
}
|
||||
|
||||
WARN_IF_NEGATIVE(
|
||||
sock_try_close( proxy->listen_fd ),
|
||||
SHOW_ERRNO( "Failed to close() listen fd %i", proxy->listen_fd )
|
||||
);
|
||||
proxy->listen_fd = -1;
|
||||
}
|
||||
|
||||
if ( -1 != proxy->downstream_fd ) {
|
||||
WARN_IF_NEGATIVE(
|
||||
sock_try_close( proxy->downstream_fd ),
|
||||
SHOW_ERRNO(
|
||||
"Failed to close() downstream fd %i", proxy->downstream_fd
|
||||
)
|
||||
);
|
||||
proxy->downstream_fd = -1;
|
||||
}
|
||||
|
||||
if ( -1 != proxy->upstream_fd ) {
|
||||
WARN_IF_NEGATIVE(
|
||||
sock_try_close( proxy->upstream_fd ),
|
||||
SHOW_ERRNO(
|
||||
"Failed to close() upstream fd %i", proxy->upstream_fd
|
||||
)
|
||||
);
|
||||
proxy->upstream_fd = -1;
|
||||
}
|
||||
|
||||
info( "Cleanup done" );
|
||||
}
|
||||
|
||||
/** Full lifecycle of the proxier */
|
||||
int do_proxy( struct proxier* params )
|
||||
{
|
||||
NULLCHECK( params );
|
||||
|
||||
info( "Ensuring upstream server is open" );
|
||||
|
||||
if ( !proxy_connect_to_upstream( params ) ) {
|
||||
warn( "Couldn't connect to upstream server during initialization, exiting" );
|
||||
proxy_cleanup( params );
|
||||
return 1;
|
||||
};
|
||||
|
||||
proxy_open_listen_socket( params );
|
||||
proxy_accept_loop( params );
|
||||
proxy_cleanup( params );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
97
src/proxy/proxy.h
Normal file
97
src/proxy/proxy.h
Normal file
@@ -0,0 +1,97 @@
|
||||
#ifndef PROXY_H
|
||||
#define PROXY_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "ioutil.h"
|
||||
#include "parse.h"
|
||||
#include "nbdtypes.h"
|
||||
#include "self_pipe.h"
|
||||
|
||||
#ifdef PREFETCH
|
||||
#include "prefetch.h"
|
||||
#endif
|
||||
|
||||
/** UPSTREAM_TIMEOUT
|
||||
* How long ( in ms ) to allow for upstream to respond. If it takes longer
|
||||
* than this, we will cancel the current request-response to them and resubmit
|
||||
*/
|
||||
#define UPSTREAM_TIMEOUT 30 * 1000
|
||||
|
||||
struct proxier {
|
||||
/** address/port to bind to */
|
||||
union mysockaddr listen_on;
|
||||
|
||||
/** address/port to connect to */
|
||||
union mysockaddr connect_to;
|
||||
|
||||
/** address to bind to when making outgoing connections */
|
||||
union mysockaddr connect_from;
|
||||
int bind; /* Set to true if we should use it */
|
||||
|
||||
/* The socket we listen() on and accept() against */
|
||||
int listen_fd;
|
||||
|
||||
/* The socket returned by accept() that we receive requests from and send
|
||||
* responses to
|
||||
*/
|
||||
int downstream_fd;
|
||||
|
||||
/* The socket returned by connect() that we send requests to and receive
|
||||
* responses from
|
||||
*/
|
||||
int upstream_fd;
|
||||
|
||||
/* This is the size we advertise to the downstream server */
|
||||
uint64_t upstream_size;
|
||||
|
||||
/* We transform the raw request header into here */
|
||||
struct nbd_request req_hdr;
|
||||
|
||||
/* We transform the raw reply header into here */
|
||||
struct nbd_reply rsp_hdr;
|
||||
|
||||
/* Used for our non-blocking negotiation with upstream. TODO: maybe use
|
||||
* for downstream as well ( we currently overload rsp ) */
|
||||
struct iobuf init;
|
||||
|
||||
/* The current NBD request from downstream */
|
||||
struct iobuf req;
|
||||
|
||||
/* The current NBD reply from upstream */
|
||||
struct iobuf rsp;
|
||||
|
||||
/* It's starting to feel like we need an object for a single proxy session.
|
||||
* These two track how many requests we've sent so far, and whether the
|
||||
* NBD_INIT code has been sent to the client yet.
|
||||
*/
|
||||
uint64_t req_count;
|
||||
int hello_sent;
|
||||
|
||||
/** These are only used if we pass --cache on the command line */
|
||||
|
||||
/* While the in-flight request has been munged by prefetch, these two are
|
||||
* set to true, and the original length of the request, respectively */
|
||||
int is_prefetch_req;
|
||||
uint32_t prefetch_req_orig_len;
|
||||
|
||||
/* And here, we actually store the prefetched data once it's returned */
|
||||
struct prefetch *prefetch;
|
||||
|
||||
/** */
|
||||
};
|
||||
|
||||
struct proxier* proxy_create(
|
||||
char* s_downstream_address,
|
||||
char* s_downstream_port,
|
||||
char* s_upstream_address,
|
||||
char* s_upstream_port,
|
||||
char* s_upstream_bind,
|
||||
char* s_cache_bytes);
|
||||
int do_proxy( struct proxier* proxy );
|
||||
void proxy_cleanup( struct proxier* proxy );
|
||||
void proxy_destroy( struct proxier* proxy );
|
||||
|
||||
#endif
|
||||
|
@@ -1,16 +0,0 @@
|
||||
#ifndef READWRITE_H
|
||||
|
||||
#define READWRITE_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
int socket_connect(struct sockaddr* to, struct sockaddr* from);
|
||||
int socket_nbd_read_hello(int fd, off64_t * size);
|
||||
void socket_nbd_read(int fd, off64_t from, int len, int out_fd, void* out_buf, int timeout_secs);
|
||||
void socket_nbd_write(int fd, off64_t from, int len, int out_fd, void* out_buf, int timeout_secs);
|
||||
void socket_nbd_entrust(int fd);
|
||||
int socket_nbd_disconnect( int fd );
|
||||
|
||||
#endif
|
||||
|
114
src/serve.h
114
src/serve.h
@@ -1,114 +0,0 @@
|
||||
#ifndef SERVE_H
|
||||
#define SERVE_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "flexnbd.h"
|
||||
#include "parse.h"
|
||||
#include "acl.h"
|
||||
|
||||
|
||||
static const int block_allocation_resolution = 4096;//128<<10;
|
||||
|
||||
|
||||
struct client_tbl_entry {
|
||||
pthread_t thread;
|
||||
union mysockaddr address;
|
||||
struct client * client;
|
||||
};
|
||||
|
||||
|
||||
#define MAX_NBD_CLIENTS 16
|
||||
struct server {
|
||||
/* The flexnbd wrapper this server is attached to */
|
||||
struct flexnbd * flexnbd;
|
||||
|
||||
/** address/port to bind to */
|
||||
union mysockaddr bind_to;
|
||||
/** (static) file name to serve */
|
||||
char* filename;
|
||||
/** file name of INCOMPLETE flag */
|
||||
char* filename_incomplete;
|
||||
/** TCP backlog for listen() */
|
||||
int tcp_backlog;
|
||||
/** (static) file name of UNIX control socket (or NULL if none) */
|
||||
char* control_socket_name;
|
||||
/** size of file */
|
||||
uint64_t size;
|
||||
|
||||
/** Claims around any I/O to this file */
|
||||
struct flexthread_mutex * l_io;
|
||||
|
||||
/** to interrupt accept loop and clients, write() to close_signal[1] */
|
||||
struct self_pipe * close_signal;
|
||||
|
||||
/** access control list */
|
||||
struct acl * acl;
|
||||
/** acl_updated_signal will be signalled after the acl struct
|
||||
* has been replaced
|
||||
*/
|
||||
struct self_pipe * acl_updated_signal;
|
||||
|
||||
/* Claimed around any updates to the ACL. */
|
||||
struct flexthread_mutex * l_acl;
|
||||
|
||||
struct mirror* mirror;
|
||||
struct mirror_super * mirror_super;
|
||||
int server_fd;
|
||||
int control_fd;
|
||||
|
||||
struct bitset_mapping* allocation_map;
|
||||
|
||||
int max_nbd_clients;
|
||||
struct client_tbl_entry *nbd_client;
|
||||
|
||||
|
||||
/* Marker for whether this server has control over the data in
|
||||
* the file, or if we're waiting to receive it from an inbound
|
||||
* migration which hasn't yet finished.
|
||||
*/
|
||||
int has_control;
|
||||
};
|
||||
|
||||
struct server * server_create(
|
||||
struct flexnbd * flexnbd,
|
||||
char* s_ip_address,
|
||||
char* s_port,
|
||||
char* s_file,
|
||||
int default_deny,
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients,
|
||||
int has_control );
|
||||
void server_destroy( struct server * );
|
||||
int server_is_closed(struct server* serve);
|
||||
void server_dirty(struct server *serve, off64_t from, int len);
|
||||
void server_lock_io( struct server * serve);
|
||||
void server_unlock_io( struct server* serve );
|
||||
void serve_signal_close( struct server *serve );
|
||||
void serve_wait_for_close( struct server * serve );
|
||||
void server_replace_acl( struct server *serve, struct acl * acl);
|
||||
void server_control_arrived( struct server *serve );
|
||||
int server_is_in_control( struct server *serve );
|
||||
int server_default_deny( struct server * serve );
|
||||
int server_io_locked( struct server * serve );
|
||||
int server_acl_locked( struct server * serve );
|
||||
void server_lock_acl( struct server *serve );
|
||||
void server_unlock_acl( struct server *serve );
|
||||
|
||||
|
||||
int do_serve( struct server * );
|
||||
|
||||
struct mode_readwrite_params {
|
||||
union mysockaddr connect_to;
|
||||
union mysockaddr connect_from;
|
||||
off64_t from;
|
||||
off64_t len;
|
||||
int data_fd;
|
||||
int client;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -31,7 +31,7 @@ static int is_included_in_acl(int list_length, struct ip_and_mask (*list)[], uni
|
||||
for (i=0; i < list_length; i++) {
|
||||
struct ip_and_mask *entry = &(*list)[i];
|
||||
int testbits;
|
||||
unsigned char *raw_address1, *raw_address2;
|
||||
unsigned char *raw_address1 = NULL, *raw_address2 = NULL;
|
||||
|
||||
debug("checking acl entry %d (%d/%d)", i, test->generic.sa_family, entry->ip.family);
|
||||
|
438
src/server/bitset.h
Normal file
438
src/server/bitset.h
Normal file
@@ -0,0 +1,438 @@
|
||||
#ifndef BITSET_H
|
||||
#define BITSET_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
||||
/*
|
||||
* Make the bitfield words 'opaque' to prevent code
|
||||
* poking at the bits directly without using these
|
||||
* accessors/macros
|
||||
*/
|
||||
typedef uint64_t bitfield_word_t;
|
||||
typedef bitfield_word_t * bitfield_p;
|
||||
|
||||
#define BITFIELD_WORD_SIZE sizeof(bitfield_word_t)
|
||||
#define BITS_PER_WORD (BITFIELD_WORD_SIZE * 8)
|
||||
|
||||
#define BIT_MASK(_idx) \
|
||||
(1LL << ((_idx) & (BITS_PER_WORD - 1)))
|
||||
#define BIT_WORD(_b, _idx) \
|
||||
((bitfield_word_t*)(_b))[(_idx) / BITS_PER_WORD]
|
||||
|
||||
/* Calculates the number of words needed to store _bytes number of bytes
|
||||
* this is added to accommodate code that wants to use bytes sizes
|
||||
*/
|
||||
#define BIT_WORDS_FOR_SIZE(_bytes) \
|
||||
((_bytes + (BITFIELD_WORD_SIZE-1)) / BITFIELD_WORD_SIZE)
|
||||
|
||||
/** Return the bit value ''idx'' in array ''b'' */
|
||||
static inline int bit_get(bitfield_p b, uint64_t idx) {
|
||||
return (BIT_WORD(b, idx) >> (idx & (BITS_PER_WORD-1))) & 1;
|
||||
}
|
||||
|
||||
/** Return 1 if the bit at ''idx'' in array ''b'' is set */
|
||||
static inline int bit_is_set(bitfield_p b, uint64_t idx) {
|
||||
return bit_get(b, idx);
|
||||
}
|
||||
/** Return 1 if the bit at ''idx'' in array ''b'' is clear */
|
||||
static inline int bit_is_clear(bitfield_p b, uint64_t idx) {
|
||||
return !bit_get(b, idx);
|
||||
}
|
||||
/** Tests whether the bit at ''idx'' in array ''b'' has value ''value'' */
|
||||
static inline int bit_has_value(bitfield_p b, uint64_t idx, int value) {
|
||||
return bit_get(b, idx) == !!value;
|
||||
}
|
||||
/** Sets the bit ''idx'' in array ''b'' */
|
||||
static inline void bit_set(bitfield_p b, uint64_t idx) {
|
||||
BIT_WORD(b, idx) |= BIT_MASK(idx);
|
||||
}
|
||||
/** Clears the bit ''idx'' in array ''b'' */
|
||||
static inline void bit_clear(bitfield_p b, uint64_t idx) {
|
||||
BIT_WORD(b, idx) &= ~BIT_MASK(idx);
|
||||
}
|
||||
/** Sets ''len'' bits in array ''b'' starting at offset ''from'' */
|
||||
static inline void bit_set_range(bitfield_p b, uint64_t from, uint64_t len)
|
||||
{
|
||||
for ( ; (from % BITS_PER_WORD) != 0 && len > 0 ; len-- ) {
|
||||
bit_set( b, from++ );
|
||||
}
|
||||
|
||||
if (len >= BITS_PER_WORD) {
|
||||
memset(&BIT_WORD(b, from), 0xff, len / 8 );
|
||||
from += len;
|
||||
len = len % BITS_PER_WORD;
|
||||
from -= len;
|
||||
}
|
||||
|
||||
for ( ; len > 0 ; len-- ) {
|
||||
bit_set( b, from++ );
|
||||
}
|
||||
}
|
||||
/** Clears ''len'' bits in array ''b'' starting at offset ''from'' */
|
||||
static inline void bit_clear_range(bitfield_p b, uint64_t from, uint64_t len)
|
||||
{
|
||||
for ( ; (from % BITS_PER_WORD) != 0 && len > 0 ; len-- ) {
|
||||
bit_clear( b, from++ );
|
||||
}
|
||||
|
||||
if (len >= BITS_PER_WORD) {
|
||||
memset(&BIT_WORD(b, from), 0, len / 8 );
|
||||
from += len;
|
||||
len = len % BITS_PER_WORD;
|
||||
from -= len;
|
||||
}
|
||||
|
||||
for ( ; len > 0 ; len-- ) {
|
||||
bit_clear( b, from++ );
|
||||
}
|
||||
}
|
||||
|
||||
/** Counts the number of contiguous bits in array ''b'', starting at ''from''
|
||||
* up to a maximum number of bits ''len''. Returns the number of contiguous
|
||||
* bits that are the same as the first one specified. If ''run_is_set'' is
|
||||
* non-NULL, the value of that bit is placed into it.
|
||||
*/
|
||||
static inline uint64_t bit_run_count(bitfield_p b, uint64_t from, uint64_t len, int *run_is_set) {
|
||||
uint64_t count = 0;
|
||||
int first_value = bit_get(b, from);
|
||||
bitfield_word_t word_match = first_value ? -1 : 0;
|
||||
|
||||
if ( run_is_set != NULL ) {
|
||||
*run_is_set = first_value;
|
||||
}
|
||||
|
||||
for ( ; ((from + count) % BITS_PER_WORD) != 0 && len > 0; len--) {
|
||||
if (bit_has_value(b, from + count, first_value)) {
|
||||
count++;
|
||||
} else {
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
for ( ; len >= BITS_PER_WORD ; len -= BITS_PER_WORD ) {
|
||||
if (BIT_WORD(b, from + count) == word_match) {
|
||||
count += BITS_PER_WORD;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for ( ; len > 0; len-- ) {
|
||||
if ( bit_has_value(b, from + count, first_value) ) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
enum bitset_stream_events {
|
||||
BITSET_STREAM_UNSET = 0,
|
||||
BITSET_STREAM_SET = 1,
|
||||
BITSET_STREAM_ON = 2,
|
||||
BITSET_STREAM_OFF = 3
|
||||
};
|
||||
#define BITSET_STREAM_EVENTS_ENUM_SIZE 4
|
||||
|
||||
struct bitset_stream_entry {
|
||||
enum bitset_stream_events event;
|
||||
uint64_t from;
|
||||
uint64_t len;
|
||||
};
|
||||
|
||||
/** Limit the stream size to 1MB for now.
|
||||
*
|
||||
* If this is too small, it'll cause requests to stall as the migration lags
|
||||
* behind the changes made by those requests.
|
||||
*/
|
||||
#define BITSET_STREAM_SIZE ( ( 1024 * 1024 ) / sizeof( struct bitset_stream_entry ) )
|
||||
|
||||
struct bitset_stream {
|
||||
struct bitset_stream_entry entries[BITSET_STREAM_SIZE];
|
||||
int in;
|
||||
int out;
|
||||
int size;
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t cond_not_full;
|
||||
pthread_cond_t cond_not_empty;
|
||||
uint64_t queued_bytes[BITSET_STREAM_EVENTS_ENUM_SIZE];
|
||||
};
|
||||
|
||||
|
||||
/** An application of a bitset - a bitset mapping represents a file of ''size''
|
||||
* broken down into ''resolution''-sized chunks. The bit set is assumed to
|
||||
* represent one bit per chunk. We also bundle a lock so that the set can be
|
||||
* written reliably by multiple threads.
|
||||
*/
|
||||
struct bitset {
|
||||
pthread_mutex_t lock;
|
||||
uint64_t size;
|
||||
int resolution;
|
||||
struct bitset_stream *stream;
|
||||
int stream_enabled;
|
||||
bitfield_word_t bits[];
|
||||
};
|
||||
|
||||
/** Allocate a bitset for a file of the given size, and chunks of the
|
||||
* given resolution.
|
||||
*/
|
||||
static inline struct bitset *bitset_alloc( uint64_t size, int resolution )
|
||||
{
|
||||
// calculate a size to allocate that is a multiple of the size of the
|
||||
// bitfield word
|
||||
size_t bitfield_size =
|
||||
BIT_WORDS_FOR_SIZE((( size + resolution - 1 ) / resolution)) * sizeof( bitfield_word_t );
|
||||
struct bitset *bitset = xmalloc(sizeof( struct bitset ) + ( bitfield_size / 8 ) );
|
||||
|
||||
bitset->size = size;
|
||||
bitset->resolution = resolution;
|
||||
/* don't actually need to call pthread_mutex_destroy '*/
|
||||
pthread_mutex_init(&bitset->lock, NULL);
|
||||
bitset->stream = xmalloc( sizeof( struct bitset_stream ) );
|
||||
pthread_mutex_init( &bitset->stream->mutex, NULL );
|
||||
|
||||
/* Technically don't need to call pthread_cond_destroy either */
|
||||
pthread_cond_init( &bitset->stream->cond_not_full, NULL );
|
||||
pthread_cond_init( &bitset->stream->cond_not_empty, NULL );
|
||||
|
||||
return bitset;
|
||||
}
|
||||
|
||||
static inline void bitset_free( struct bitset * set )
|
||||
{
|
||||
/* TODO: free our mutex... */
|
||||
|
||||
free( set->stream );
|
||||
set->stream = NULL;
|
||||
|
||||
free( set );
|
||||
}
|
||||
|
||||
#define INT_FIRST_AND_LAST \
|
||||
uint64_t first = from/set->resolution, \
|
||||
last = ((from+len)-1)/set->resolution, \
|
||||
bitlen = (last-first)+1
|
||||
|
||||
#define BITSET_LOCK \
|
||||
FATAL_IF_NEGATIVE(pthread_mutex_lock(&set->lock), "Error locking bitset")
|
||||
|
||||
#define BITSET_UNLOCK \
|
||||
FATAL_IF_NEGATIVE(pthread_mutex_unlock(&set->lock), "Error unlocking bitset")
|
||||
|
||||
|
||||
static inline void bitset_stream_enqueue(
|
||||
struct bitset * set,
|
||||
enum bitset_stream_events event,
|
||||
uint64_t from,
|
||||
uint64_t len
|
||||
)
|
||||
{
|
||||
struct bitset_stream * stream = set->stream;
|
||||
|
||||
pthread_mutex_lock( &stream->mutex );
|
||||
|
||||
while ( stream->size == BITSET_STREAM_SIZE ) {
|
||||
pthread_cond_wait( &stream->cond_not_full, &stream->mutex );
|
||||
}
|
||||
|
||||
stream->entries[stream->in].event = event;
|
||||
stream->entries[stream->in].from = from;
|
||||
stream->entries[stream->in].len = len;
|
||||
stream->queued_bytes[event] += len;
|
||||
|
||||
stream->size++;
|
||||
stream->in++;
|
||||
stream->in %= BITSET_STREAM_SIZE;
|
||||
|
||||
pthread_mutex_unlock( & stream->mutex );
|
||||
pthread_cond_signal( &stream->cond_not_empty );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void bitset_stream_dequeue(
|
||||
struct bitset * set,
|
||||
struct bitset_stream_entry * out
|
||||
)
|
||||
{
|
||||
struct bitset_stream * stream = set->stream;
|
||||
struct bitset_stream_entry * dequeued;
|
||||
|
||||
pthread_mutex_lock( &stream->mutex );
|
||||
|
||||
while ( stream->size == 0 ) {
|
||||
pthread_cond_wait( &stream->cond_not_empty, &stream->mutex );
|
||||
}
|
||||
|
||||
dequeued = &stream->entries[stream->out];
|
||||
|
||||
if ( out != NULL ) {
|
||||
out->event = dequeued->event;
|
||||
out->from = dequeued->from;
|
||||
out->len = dequeued->len;
|
||||
}
|
||||
|
||||
stream->queued_bytes[dequeued->event] -= dequeued->len;
|
||||
stream->size--;
|
||||
stream->out++;
|
||||
stream->out %= BITSET_STREAM_SIZE;
|
||||
|
||||
pthread_mutex_unlock( &stream->mutex );
|
||||
pthread_cond_signal( &stream->cond_not_full );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static inline size_t bitset_stream_size( struct bitset * set )
|
||||
{
|
||||
size_t size;
|
||||
|
||||
pthread_mutex_lock( &set->stream->mutex );
|
||||
size = set->stream->size;
|
||||
pthread_mutex_unlock( &set->stream->mutex );
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static inline uint64_t bitset_stream_queued_bytes(
|
||||
struct bitset * set,
|
||||
enum bitset_stream_events event
|
||||
)
|
||||
{
|
||||
uint64_t total;
|
||||
|
||||
pthread_mutex_lock( &set->stream->mutex );
|
||||
total = set->stream->queued_bytes[event];
|
||||
pthread_mutex_unlock( &set->stream->mutex );
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static inline void bitset_enable_stream( struct bitset * set )
|
||||
{
|
||||
BITSET_LOCK;
|
||||
set->stream_enabled = 1;
|
||||
bitset_stream_enqueue( set, BITSET_STREAM_ON, 0, set->size );
|
||||
BITSET_UNLOCK;
|
||||
}
|
||||
|
||||
static inline void bitset_disable_stream( struct bitset * set )
|
||||
{
|
||||
BITSET_LOCK;
|
||||
bitset_stream_enqueue( set, BITSET_STREAM_OFF, 0, set->size );
|
||||
set->stream_enabled = 0;
|
||||
BITSET_UNLOCK;
|
||||
}
|
||||
|
||||
/** Set the bits in a bitset which correspond to the given bytes in the larger
|
||||
* file.
|
||||
*/
|
||||
static inline void bitset_set_range(
|
||||
struct bitset * set,
|
||||
uint64_t from,
|
||||
uint64_t len)
|
||||
{
|
||||
INT_FIRST_AND_LAST;
|
||||
BITSET_LOCK;
|
||||
bit_set_range(set->bits, first, bitlen);
|
||||
|
||||
if ( set->stream_enabled ) {
|
||||
bitset_stream_enqueue( set, BITSET_STREAM_SET, from, len );
|
||||
}
|
||||
|
||||
BITSET_UNLOCK;
|
||||
}
|
||||
|
||||
|
||||
/** Set every bit in the bitset. */
|
||||
static inline void bitset_set( struct bitset * set )
|
||||
{
|
||||
bitset_set_range(set, 0, set->size);
|
||||
}
|
||||
|
||||
/** Clear the bits in a bitset which correspond to the given bytes in the
|
||||
* larger file.
|
||||
*/
|
||||
static inline void bitset_clear_range(
|
||||
struct bitset * set,
|
||||
uint64_t from,
|
||||
uint64_t len)
|
||||
{
|
||||
INT_FIRST_AND_LAST;
|
||||
BITSET_LOCK;
|
||||
bit_clear_range(set->bits, first, bitlen);
|
||||
|
||||
if ( set->stream_enabled ) {
|
||||
bitset_stream_enqueue( set, BITSET_STREAM_UNSET, from, len );
|
||||
}
|
||||
|
||||
BITSET_UNLOCK;
|
||||
}
|
||||
|
||||
|
||||
/** Clear every bit in the bitset. */
|
||||
static inline void bitset_clear( struct bitset * set )
|
||||
{
|
||||
bitset_clear_range(set, 0, set->size);
|
||||
}
|
||||
|
||||
/** As per bitset_run_count but also tells you whether the run it found was set
|
||||
* or unset, atomically.
|
||||
*/
|
||||
static inline uint64_t bitset_run_count_ex(
|
||||
struct bitset * set,
|
||||
uint64_t from,
|
||||
uint64_t len,
|
||||
int* run_is_set
|
||||
)
|
||||
{
|
||||
uint64_t run;
|
||||
|
||||
/* Clip our requests to the end of the bitset, avoiding uint underflow. */
|
||||
if ( from > set->size ) {
|
||||
return 0;
|
||||
}
|
||||
len = ( len + from ) > set->size ? ( set->size - from ) : len;
|
||||
|
||||
INT_FIRST_AND_LAST;
|
||||
|
||||
BITSET_LOCK;
|
||||
run = bit_run_count(set->bits, first, bitlen, run_is_set) * set->resolution;
|
||||
run -= (from % set->resolution);
|
||||
BITSET_UNLOCK;
|
||||
|
||||
return run;
|
||||
}
|
||||
|
||||
/** Counts the number of contiguous bytes that are represented as a run in
|
||||
* the bit field.
|
||||
*/
|
||||
static inline uint64_t bitset_run_count(
|
||||
struct bitset * set,
|
||||
uint64_t from,
|
||||
uint64_t len)
|
||||
{
|
||||
return bitset_run_count_ex( set, from, len, NULL );
|
||||
}
|
||||
|
||||
/** Tests whether the bit field is clear for the given file offset.
|
||||
*/
|
||||
static inline int bitset_is_clear_at( struct bitset * set, uint64_t at )
|
||||
{
|
||||
return bit_is_clear(set->bits, at/set->resolution);
|
||||
}
|
||||
|
||||
/** Tests whether the bit field is set for the given file offset.
|
||||
*/
|
||||
static inline int bitset_is_set_at( struct bitset * set, uint64_t at )
|
||||
{
|
||||
return bit_is_set(set->bits, at/set->resolution);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -1,12 +1,12 @@
|
||||
#include "client.h"
|
||||
#include "serve.h"
|
||||
#include "util.h"
|
||||
#include "ioutil.h"
|
||||
#include "sockutil.h"
|
||||
#include "util.h"
|
||||
#include "bitset.h"
|
||||
#include "nbdtypes.h"
|
||||
#include "self_pipe.h"
|
||||
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
@@ -16,25 +16,49 @@
|
||||
#include <fcntl.h>
|
||||
|
||||
|
||||
// When this signal is invoked, we call shutdown() on the client fd, which
|
||||
// results in the thread being wound up
|
||||
void client_killswitch_hit(int signal __attribute__ ((unused)), siginfo_t *info, void *ptr __attribute__ ((unused)))
|
||||
{
|
||||
int fd = info->si_value.sival_int;
|
||||
warn( "Killswitch for fd %i activated, calling shutdown on socket", fd );
|
||||
|
||||
|
||||
FATAL_IF(
|
||||
-1 == shutdown( fd, SHUT_RDWR ),
|
||||
SHOW_ERRNO( "Failed to shutdown() the socket, killing the server" )
|
||||
);
|
||||
}
|
||||
|
||||
struct client *client_create( struct server *serve, int socket )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
|
||||
struct client *c;
|
||||
struct sigevent evp = {
|
||||
.sigev_notify = SIGEV_SIGNAL,
|
||||
.sigev_signo = CLIENT_KILLSWITCH_SIGNAL
|
||||
};
|
||||
|
||||
c = xmalloc( sizeof( struct server ) );
|
||||
/*
|
||||
* Our killswitch closes this socket, forcing read() and write() calls
|
||||
* blocked on it to return with an error. The thread then close()s the
|
||||
* socket itself, avoiding races.
|
||||
*/
|
||||
evp.sigev_value.sival_int = socket;
|
||||
|
||||
c = xmalloc( sizeof( struct client ) );
|
||||
c->stopped = 0;
|
||||
c->socket = socket;
|
||||
c->serve = serve;
|
||||
|
||||
c->stop_signal = self_pipe_create();
|
||||
|
||||
c->entrusted = 0;
|
||||
FATAL_IF_NEGATIVE(
|
||||
timer_create( CLOCK_MONOTONIC, &evp, &(c->killswitch) ),
|
||||
SHOW_ERRNO( "Failed to create killswitch timer" )
|
||||
);
|
||||
|
||||
debug( "Alloced client %p (%d, %d)", c, c->stop_signal->read_fd, c->stop_signal->write_fd );
|
||||
debug( "Alloced client %p with socket %d", c, socket );
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -51,8 +75,14 @@ void client_destroy( struct client *client )
|
||||
{
|
||||
NULLCHECK( client );
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
timer_delete( client->killswitch ),
|
||||
SHOW_ERRNO( "Couldn't delete killswitch" )
|
||||
);
|
||||
|
||||
debug( "Destroying stop signal for client %p", client );
|
||||
self_pipe_destroy( client->stop_signal );
|
||||
debug( "Freeing client %p", client );
|
||||
free( client );
|
||||
}
|
||||
|
||||
@@ -61,7 +91,7 @@ void client_destroy( struct client *client )
|
||||
/**
|
||||
* So waiting on client->socket is len bytes of data, and we must write it all
|
||||
* to client->mapped. However while doing do we must consult the bitmap
|
||||
* client->block_allocation_map, which is a bitmap where one bit represents
|
||||
* client->serve->allocation_map, which is a bitmap where one bit represents
|
||||
* block_allocation_resolution bytes. Where a bit isn't set, there are no
|
||||
* disc blocks allocated for that portion of the file, and we'd like to keep
|
||||
* it that way.
|
||||
@@ -70,11 +100,13 @@ void client_destroy( struct client *client )
|
||||
* allocated, we can proceed as normal and make one call to writeloop.
|
||||
*
|
||||
*/
|
||||
void write_not_zeroes(struct client* client, uint64_t from, int len)
|
||||
void write_not_zeroes(struct client* client, uint64_t from, uint64_t len)
|
||||
{
|
||||
NULLCHECK( client );
|
||||
NULLCHECK( client->serve );
|
||||
NULLCHECK( client->serve->allocation_map );
|
||||
|
||||
struct bitset_mapping *map = client->serve->allocation_map;
|
||||
struct bitset * map = client->serve->allocation_map;
|
||||
|
||||
while (len > 0) {
|
||||
/* so we have to calculate how much of our input to consider
|
||||
@@ -85,7 +117,7 @@ void write_not_zeroes(struct client* client, uint64_t from, int len)
|
||||
* and end to get the exact number of bytes.
|
||||
*/
|
||||
|
||||
int run = bitset_run_count(map, from, len);
|
||||
uint64_t run = bitset_run_count(map, from, len);
|
||||
|
||||
debug("write_not_zeroes: from=%ld, len=%d, run=%d", from, len, run);
|
||||
|
||||
@@ -94,7 +126,9 @@ void write_not_zeroes(struct client* client, uint64_t from, int len)
|
||||
debug("(run adjusted to %d)", run);
|
||||
}
|
||||
|
||||
if (0) /* useful but expensive */
|
||||
/*
|
||||
// Useful but expensive
|
||||
if (0)
|
||||
{
|
||||
uint64_t i;
|
||||
fprintf(stderr, "full map resolution=%d: ", map->resolution);
|
||||
@@ -107,6 +141,7 @@ void write_not_zeroes(struct client* client, uint64_t from, int len)
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
*/
|
||||
|
||||
#define DO_READ(dst, len) ERROR_IF_NEGATIVE( \
|
||||
readloop( \
|
||||
@@ -121,7 +156,12 @@ void write_not_zeroes(struct client* client, uint64_t from, int len)
|
||||
debug("writing the lot: from=%ld, run=%d", from, run);
|
||||
/* already allocated, just write it all */
|
||||
DO_READ(client->mapped + from, run);
|
||||
server_dirty(client->serve, from, run);
|
||||
/* We know from our earlier call to bitset_run_count that the
|
||||
* bitset is all-1s at this point, but we need to dirty it for the
|
||||
* sake of the event stream - the actual bytes have changed, and we
|
||||
* are interested in that fact.
|
||||
*/
|
||||
bitset_set_range( map, from, run );
|
||||
len -= run;
|
||||
from += run;
|
||||
}
|
||||
@@ -129,7 +169,7 @@ void write_not_zeroes(struct client* client, uint64_t from, int len)
|
||||
char zerobuffer[block_allocation_resolution];
|
||||
/* not allocated, read in block_allocation_resoution */
|
||||
while (run > 0) {
|
||||
int blockrun = block_allocation_resolution -
|
||||
uint64_t blockrun = block_allocation_resolution -
|
||||
(from % block_allocation_resolution);
|
||||
if (blockrun > run)
|
||||
blockrun = run;
|
||||
@@ -141,11 +181,13 @@ void write_not_zeroes(struct client* client, uint64_t from, int len)
|
||||
* and memcpy being fast, rather than try to
|
||||
* hand-optimized something specific.
|
||||
*/
|
||||
if (zerobuffer[0] != 0 ||
|
||||
memcmp(zerobuffer, zerobuffer + 1, blockrun - 1)) {
|
||||
|
||||
int all_zeros = (zerobuffer[0] == 0) &&
|
||||
(0 == memcmp( zerobuffer, zerobuffer+1, blockrun-1 ));
|
||||
|
||||
if ( !all_zeros ) {
|
||||
memcpy(client->mapped+from, zerobuffer, blockrun);
|
||||
bitset_set_range(map, from, blockrun);
|
||||
server_dirty(client->serve, from, blockrun);
|
||||
/* at this point we could choose to
|
||||
* short-cut the rest of the write for
|
||||
* faster I/O but by continuing to do it
|
||||
@@ -153,6 +195,10 @@ void write_not_zeroes(struct client* client, uint64_t from, int len)
|
||||
* sparseness as possible.
|
||||
*/
|
||||
}
|
||||
/* When the block is all_zeroes, no bytes have changed, so we
|
||||
* don't need to put an event into the bitset stream. This may
|
||||
* be surprising in the future.
|
||||
*/
|
||||
|
||||
len -= blockrun;
|
||||
run -= blockrun;
|
||||
@@ -177,31 +223,6 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
|
||||
NULLCHECK( out_request );
|
||||
|
||||
struct nbd_request_raw request_raw;
|
||||
fd_set fds;
|
||||
struct timeval tv = {CLIENT_MAX_WAIT_SECS, 0};
|
||||
struct timeval * ptv;
|
||||
int fd_count;
|
||||
|
||||
/* We want a timeout if this is an inbound migration, but not
|
||||
* otherwise
|
||||
*/
|
||||
ptv = server_is_in_control( client->serve ) ? NULL : &tv;
|
||||
|
||||
FD_ZERO(&fds);
|
||||
FD_SET(client->socket, &fds);
|
||||
self_pipe_fd_set( client->stop_signal, &fds );
|
||||
fd_count = select(FD_SETSIZE, &fds, NULL, NULL, ptv);
|
||||
if ( fd_count == 0 ) {
|
||||
/* This "can't ever happen" */
|
||||
if ( NULL == ptv ) { fatal( "No FDs selected, and no timeout!" ); }
|
||||
else { error("Timed out waiting for I/O"); }
|
||||
}
|
||||
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
|
||||
|
||||
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
|
||||
debug("Client received stop signal.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (fd_read_request(client->socket, &request_raw) == -1) {
|
||||
*disconnected = 1;
|
||||
@@ -228,22 +249,22 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
|
||||
}
|
||||
|
||||
nbd_r2h_request( &request_raw, out_request );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd_write_reply( int fd, char *handle, int error )
|
||||
int fd_write_reply( int fd, uint64_t handle, int error )
|
||||
{
|
||||
struct nbd_reply reply;
|
||||
struct nbd_reply_raw reply_raw;
|
||||
|
||||
reply.magic = REPLY_MAGIC;
|
||||
reply.error = error;
|
||||
memcpy( reply.handle, handle, 8 );
|
||||
reply.handle.w = handle;
|
||||
|
||||
nbd_h2r_reply( &reply, &reply_raw );
|
||||
debug( "Replying with handle=0x%08X, error=%"PRIu32, handle, error );
|
||||
|
||||
if( -1 == write( fd, &reply_raw, sizeof( reply_raw ) ) ) {
|
||||
if( -1 == writeloop( fd, &reply_raw, sizeof( reply_raw ) ) ) {
|
||||
switch( errno ) {
|
||||
case ECONNRESET:
|
||||
error( "Connection reset while writing reply" );
|
||||
@@ -270,7 +291,7 @@ int fd_write_reply( int fd, char *handle, int error )
|
||||
*/
|
||||
int client_write_reply( struct client * client, struct nbd_request *request, int error )
|
||||
{
|
||||
return fd_write_reply( client->socket, request->handle, error);
|
||||
return fd_write_reply( client->socket, request->handle.w, error);
|
||||
}
|
||||
|
||||
|
||||
@@ -279,7 +300,7 @@ void client_write_init( struct client * client, uint64_t size )
|
||||
struct nbd_init init = {{0}};
|
||||
struct nbd_init_raw init_raw = {{0}};
|
||||
|
||||
memcpy( init.passwd, INIT_PASSWD, sizeof( INIT_PASSWD ) );
|
||||
memcpy( init.passwd, INIT_PASSWD, sizeof( init.passwd ) );
|
||||
init.magic = INIT_MAGIC;
|
||||
init.size = size;
|
||||
memset( init.reserved, 0, 128 );
|
||||
@@ -341,46 +362,46 @@ void client_flush( struct client * client, size_t len )
|
||||
* Returns 1 if we do, 0 otherwise.
|
||||
* request_err is set to 0 if the client sent a bad request, in which
|
||||
* case we drop the connection.
|
||||
* FIXME: after an ENTRUST, there's no way to distinguish between a
|
||||
* DISCONNECT and any bad request.
|
||||
*/
|
||||
int client_request_needs_reply( struct client * client,
|
||||
struct nbd_request request )
|
||||
{
|
||||
debug("request type %d", request.type);
|
||||
|
||||
/* The client is stupid, but don't take down the whole server as a result.
|
||||
* We send a reply before disconnecting so that at least some indication of
|
||||
* the problem is visible, and so proxies don't retry the same (bad) request
|
||||
* forever.
|
||||
*/
|
||||
if (request.magic != REQUEST_MAGIC) {
|
||||
fatal("Bad magic %08x", request.magic);
|
||||
warn("Bad magic 0x%08X from client", request.magic);
|
||||
client_write_reply( client, &request, EBADMSG );
|
||||
client->disconnect = 1; // no need to flush
|
||||
return 0;
|
||||
}
|
||||
|
||||
debug(
|
||||
"request type=%"PRIu32", from=%"PRIu64", len=%"PRIu32", handle=0x%08X",
|
||||
request.type, request.from, request.len, request.handle
|
||||
);
|
||||
|
||||
/* check it's not out of range */
|
||||
if ( request.from+request.len > client->serve->size) {
|
||||
warn("write request %"PRIu64"+%"PRIu32" out of range",
|
||||
request.from, request.len
|
||||
);
|
||||
if ( request.type == REQUEST_WRITE ) {
|
||||
client_flush( client, request.len );
|
||||
}
|
||||
client_write_reply( client, &request, EPERM ); /* TODO: Change to ERANGE ? */
|
||||
client->disconnect = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
switch (request.type)
|
||||
{
|
||||
case REQUEST_READ:
|
||||
ERROR_IF( client->entrusted,
|
||||
"Received a read request "
|
||||
"after an entrust message.");
|
||||
break;
|
||||
case REQUEST_WRITE:
|
||||
ERROR_IF( client->entrusted,
|
||||
"Received a write request "
|
||||
"after an entrust message.");
|
||||
/* check it's not out of range */
|
||||
if ( request.from+request.len > client->serve->size) {
|
||||
warn("write request %d+%d out of range",
|
||||
request.from,
|
||||
request.len
|
||||
);
|
||||
client_write_reply( client, &request, 1 );
|
||||
client_flush( client, request.len );
|
||||
client->disconnect = 0;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case REQUEST_ENTRUST:
|
||||
/* Yes, we need to reply to an entrust, but we take no
|
||||
* further action */
|
||||
debug("request entrust");
|
||||
break;
|
||||
case REQUEST_DISCONNECT:
|
||||
debug("request disconnect");
|
||||
@@ -388,30 +409,18 @@ int client_request_needs_reply( struct client * client,
|
||||
return 0;
|
||||
|
||||
default:
|
||||
fatal("Unknown request %08x", request.type);
|
||||
fatal("Unknown request 0x%08X", request.type);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
void client_reply_to_entrust( struct client * client, struct nbd_request request )
|
||||
{
|
||||
/* An entrust needs a response, but has no data. */
|
||||
debug( "request entrust" );
|
||||
|
||||
client_write_reply( client, &request, 0 );
|
||||
/* We set this after trying to send the reply, so we know the
|
||||
* reply got away safely.
|
||||
*/
|
||||
client->entrusted = 1;
|
||||
}
|
||||
|
||||
|
||||
void client_reply_to_read( struct client* client, struct nbd_request request )
|
||||
{
|
||||
off64_t offset;
|
||||
|
||||
debug("request read %ld+%d", request.from, request.len);
|
||||
sock_set_tcp_cork( client->socket, 1 );
|
||||
client_write_reply( client, &request, 0 );
|
||||
|
||||
offset = request.from;
|
||||
@@ -428,18 +437,20 @@ void client_reply_to_read( struct client* client, struct nbd_request request )
|
||||
"sendfile failed from=%ld, len=%d",
|
||||
offset,
|
||||
request.len);
|
||||
|
||||
sock_set_tcp_cork( client->socket, 0 );
|
||||
}
|
||||
|
||||
|
||||
void client_reply_to_write( struct client* client, struct nbd_request request )
|
||||
{
|
||||
debug("request write %ld+%d", request.from, request.len);
|
||||
if (client->serve->allocation_map) {
|
||||
debug("request write from=%"PRIu64", len=%"PRIu32", handle=0x%08X", request.from, request.len, request.handle);
|
||||
if (client->serve->allocation_map_built) {
|
||||
write_not_zeroes( client, request.from, request.len );
|
||||
}
|
||||
else {
|
||||
debug("No allocation map, writing directly.");
|
||||
/* If we get cut off partway through reading this data
|
||||
/* If we get cut off partway through reading this data:
|
||||
* */
|
||||
ERROR_IF_NEGATIVE(
|
||||
readloop( client->socket,
|
||||
@@ -449,7 +460,12 @@ void client_reply_to_write( struct client* client, struct nbd_request request )
|
||||
request.from,
|
||||
request.len
|
||||
);
|
||||
server_dirty(client->serve, request.from, request.len);
|
||||
|
||||
/* the allocation_map is shared between client threads, and may be
|
||||
* being built. We need to reflect the write in it, as it may be in
|
||||
* a position the builder has already gone over.
|
||||
*/
|
||||
bitset_set_range(client->serve->allocation_map, request.from, request.len);
|
||||
}
|
||||
|
||||
if (1) /* not sure whether this is necessary... */
|
||||
@@ -461,7 +477,7 @@ void client_reply_to_write( struct client* client, struct nbd_request request )
|
||||
FATAL_IF_NEGATIVE(
|
||||
msync( client->mapped + from_rounded,
|
||||
len_rounded,
|
||||
MS_SYNC),
|
||||
MS_SYNC | MS_INVALIDATE),
|
||||
"msync failed %ld %ld", request.from, request.len
|
||||
);
|
||||
}
|
||||
@@ -478,36 +494,135 @@ void client_reply( struct client* client, struct nbd_request request )
|
||||
case REQUEST_WRITE:
|
||||
client_reply_to_write( client, request );
|
||||
break;
|
||||
case REQUEST_ENTRUST:
|
||||
client_reply_to_entrust( client, request );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Starts a timer that will kill the whole process if disarm is not called
|
||||
* within a timeout (see CLIENT_HANDLE_TIMEOUT).
|
||||
*/
|
||||
void client_arm_killswitch( struct client* client )
|
||||
{
|
||||
struct itimerspec its = {
|
||||
.it_value = { .tv_nsec = 0, .tv_sec = CLIENT_HANDLER_TIMEOUT },
|
||||
.it_interval = { .tv_nsec = 0, .tv_sec = 0 }
|
||||
};
|
||||
|
||||
if ( !client->serve->use_killswitch ) {
|
||||
return;
|
||||
}
|
||||
|
||||
debug( "Arming killswitch" );
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
timer_settime( client->killswitch, 0, &its, NULL ),
|
||||
SHOW_ERRNO( "Failed to arm killswitch" )
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void client_disarm_killswitch( struct client* client )
|
||||
{
|
||||
struct itimerspec its = {
|
||||
.it_value = { .tv_nsec = 0, .tv_sec = 0 },
|
||||
.it_interval = { .tv_nsec = 0, .tv_sec = 0 }
|
||||
};
|
||||
|
||||
if ( !client->serve->use_killswitch ) {
|
||||
return;
|
||||
}
|
||||
|
||||
debug( "Disarming killswitch" );
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
timer_settime( client->killswitch, 0, &its, NULL ),
|
||||
SHOW_ERRNO( "Failed to disarm killswitch" )
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Returns 0 if we should continue trying to serve requests */
|
||||
int client_serve_request(struct client* client)
|
||||
{
|
||||
struct nbd_request request = {0};
|
||||
int failure = 1;
|
||||
int stop = 1;
|
||||
int disconnected = 0;
|
||||
fd_set rfds, efds;
|
||||
int fd_count;
|
||||
|
||||
/* wait until there are some bytes on the fd before committing to reads
|
||||
* FIXME: this whole scheme is broken because we're using blocking reads.
|
||||
* read() can block directly after a select anyway, and it's possible that,
|
||||
* without the killswitch, we'd hang forever. With the killswitch, we just
|
||||
* hang for "a while". The Right Thing to do is to rewrite client.c to be
|
||||
* non-blocking.
|
||||
*/
|
||||
|
||||
FD_ZERO( &rfds );
|
||||
FD_SET( client->socket, &rfds );
|
||||
self_pipe_fd_set( client->stop_signal, &rfds );
|
||||
|
||||
FD_ZERO( &efds );
|
||||
FD_SET( client->socket, &efds );
|
||||
|
||||
fd_count = sock_try_select( FD_SETSIZE, &rfds, NULL, &efds, NULL );
|
||||
|
||||
if ( fd_count == 0 ) {
|
||||
/* This "can't ever happen" */
|
||||
fatal( "No FDs selected, and no timeout!" );
|
||||
}
|
||||
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
|
||||
|
||||
if ( self_pipe_fd_isset( client->stop_signal, &rfds ) ){
|
||||
debug("Client received stop signal.");
|
||||
return 1; // Don't try to serve more requests
|
||||
}
|
||||
|
||||
if ( FD_ISSET( client->socket, &efds ) ) {
|
||||
debug( "Client connection closed" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* We arm / disarm around the whole request cycle. The reason for this is
|
||||
* that the remote peer could uncleanly die at any point; if we're stuck on
|
||||
* a blocking read(), then that will hang for (almost) forever. This is bad
|
||||
* in general, makes the server respond only to kill -9, and breaks
|
||||
* outward mirroring in a most unpleasant way.
|
||||
*
|
||||
* Don't forget to disarm before exiting, no matter what!
|
||||
*
|
||||
* The replication is simple: open a connection to the flexnbd server, write
|
||||
* a single byte, and then wait.
|
||||
*
|
||||
*/
|
||||
client_arm_killswitch( client );
|
||||
|
||||
if ( !client_read_request( client, &request, &disconnected ) ) {
|
||||
client_disarm_killswitch( client );
|
||||
return stop;
|
||||
}
|
||||
if ( disconnected ) {
|
||||
client_disarm_killswitch( client );
|
||||
return stop;
|
||||
}
|
||||
|
||||
if ( !client_read_request( client, &request, &disconnected ) ) { return failure; }
|
||||
if ( disconnected ) { return failure; }
|
||||
if ( !client_request_needs_reply( client, request ) ) {
|
||||
client_disarm_killswitch( client );
|
||||
return client->disconnect;
|
||||
}
|
||||
|
||||
server_lock_io( client->serve );
|
||||
{
|
||||
if ( !server_is_closed( client->serve ) ) {
|
||||
client_reply( client, request );
|
||||
failure = 0;
|
||||
stop = 0;
|
||||
}
|
||||
}
|
||||
server_unlock_io( client->serve );
|
||||
|
||||
return failure;
|
||||
client_disarm_killswitch( client );
|
||||
return stop;
|
||||
}
|
||||
|
||||
|
||||
@@ -521,13 +636,27 @@ void client_cleanup(struct client* client,
|
||||
{
|
||||
info("client cleanup for client %p", client);
|
||||
|
||||
if (client->socket) { close(client->socket); }
|
||||
/* If the thread hits an error, we need to ensure this is off */
|
||||
client_disarm_killswitch( client );
|
||||
|
||||
if (client->socket) {
|
||||
FATAL_IF_NEGATIVE( close(client->socket),
|
||||
"Error closing client socket %d",
|
||||
client->socket );
|
||||
debug("Closed client socket fd %d", client->socket);
|
||||
client->socket = -1;
|
||||
}
|
||||
if (client->mapped) {
|
||||
munmap(client->mapped, client->serve->size);
|
||||
}
|
||||
if (client->fileno) { close(client->fileno); }
|
||||
if (client->fileno) {
|
||||
FATAL_IF_NEGATIVE( close(client->fileno),
|
||||
"Error closing file %d",
|
||||
client->fileno );
|
||||
debug("Closed client file fd %d", client->fileno );
|
||||
client->fileno = -1;
|
||||
}
|
||||
|
||||
if ( server_io_locked( client->serve ) ) { server_unlock_io( client->serve ); }
|
||||
if ( server_acl_locked( client->serve ) ) { server_unlock_acl( client->serve ); }
|
||||
|
||||
}
|
||||
@@ -548,6 +677,13 @@ void* client_serve(void* client_uncast)
|
||||
),
|
||||
"Couldn't open/mmap file %s: %s", client->serve->filename, strerror( errno )
|
||||
);
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
madvise( client->mapped, client->serve->size, MADV_RANDOM ),
|
||||
SHOW_ERRNO( "Failed to madvise() %s", client->serve->filename )
|
||||
);
|
||||
|
||||
debug( "Opened client file fd %d", client->fileno);
|
||||
debug("client: sending hello");
|
||||
client_send_hello(client);
|
||||
|
||||
@@ -557,21 +693,10 @@ void* client_serve(void* client_uncast)
|
||||
debug("client: stopped serving requests");
|
||||
client->stopped = 1;
|
||||
|
||||
if ( client->entrusted ) {
|
||||
if ( client->disconnect ){
|
||||
debug("client: control arrived" );
|
||||
server_control_arrived( client->serve );
|
||||
}
|
||||
else {
|
||||
warn( "client: control transfer failed." );
|
||||
}
|
||||
}
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
close(client->socket),
|
||||
"Couldn't close socket %d",
|
||||
client->socket
|
||||
);
|
||||
|
||||
debug("Cleaning client %p up normally in thread %p", client, pthread_self());
|
||||
client_cleanup(client, 0);
|
||||
@@ -579,3 +704,4 @@ void* client_serve(void* client_uncast)
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -1,13 +1,20 @@
|
||||
#ifndef CLIENT_H
|
||||
#define CLIENT_H
|
||||
|
||||
/** CLIENT_MAX_WAIT_SECS
|
||||
* This is the length of time an inbound migration will wait for a fresh
|
||||
* write before assuming the source has Gone Away. Note: it is *not*
|
||||
* the time from one write to the next, it is the gap between the end of
|
||||
* one write and the start of the next.
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
|
||||
/** CLIENT_HANDLER_TIMEOUT
|
||||
* This is the length of time (in seconds) any request can be outstanding for.
|
||||
* If we spend longer than this in a request, the whole server is killed.
|
||||
*/
|
||||
#define CLIENT_MAX_WAIT_SECS 5
|
||||
#define CLIENT_HANDLER_TIMEOUT 120
|
||||
|
||||
/** CLIENT_KILLSWITCH_SIGNAL
|
||||
* The signal number we use to kill the server when *any* killswitch timer
|
||||
* fires. The handler gets the fd of the client socket to work with.
|
||||
*/
|
||||
#define CLIENT_KILLSWITCH_SIGNAL ( SIGRTMIN + 1 )
|
||||
|
||||
|
||||
struct client {
|
||||
@@ -28,13 +35,17 @@ struct client {
|
||||
|
||||
struct server* serve; /* FIXME: remove above duplication */
|
||||
|
||||
/* Have we seen a REQUEST_ENTRUST message? */
|
||||
int entrusted;
|
||||
|
||||
/* Have we seen a REQUEST_DISCONNECT message? */
|
||||
int disconnect;
|
||||
|
||||
/* kill the whole server if a request has been outstanding too long,
|
||||
* assuming use_killswitch is set in serve
|
||||
*/
|
||||
timer_t killswitch;
|
||||
|
||||
};
|
||||
|
||||
void client_killswitch_hit(int signal, siginfo_t *info, void *ptr);
|
||||
|
||||
void* client_serve(void* client_uncast);
|
||||
struct client * client_create( struct server * serve, int socket );
|
||||
@@ -42,3 +53,4 @@ void client_destroy( struct client * client );
|
||||
void client_signal_stop( struct client * client );
|
||||
|
||||
#endif
|
||||
|
@@ -54,6 +54,7 @@ struct control * control_create(
|
||||
|
||||
control->flexnbd = flexnbd;
|
||||
control->socket_name = csn;
|
||||
control->open_signal = self_pipe_create();
|
||||
control->close_signal = self_pipe_create();
|
||||
control->mirror_state_mbox = mbox_create();
|
||||
|
||||
@@ -75,6 +76,7 @@ void control_destroy( struct control * control )
|
||||
|
||||
mbox_destroy( control->mirror_state_mbox );
|
||||
self_pipe_destroy( control->close_signal );
|
||||
self_pipe_destroy( control->open_signal );
|
||||
free( control );
|
||||
}
|
||||
|
||||
@@ -205,10 +207,23 @@ void control_listen(struct control* control)
|
||||
control->control_fd = open_control_socket( control->socket_name );
|
||||
}
|
||||
|
||||
void control_wait_for_open_signal( struct control * control )
|
||||
{
|
||||
fd_set fds;
|
||||
FD_ZERO( &fds );
|
||||
self_pipe_fd_set( control->open_signal, &fds );
|
||||
FATAL_IF_NEGATIVE( select( FD_SETSIZE, &fds, NULL, NULL, NULL ),
|
||||
"select() failed" );
|
||||
|
||||
self_pipe_signal_clear( control->open_signal );
|
||||
}
|
||||
|
||||
|
||||
void control_serve( struct control * control )
|
||||
{
|
||||
NULLCHECK( control );
|
||||
|
||||
control_wait_for_open_signal( control );
|
||||
control_listen( control );
|
||||
while( control_accept( control ) );
|
||||
}
|
||||
@@ -235,7 +250,7 @@ void * control_runner( void * control_uncast )
|
||||
control_serve( control );
|
||||
|
||||
control_cleanup( control, 0 );
|
||||
return NULL;
|
||||
pthread_exit( NULL );
|
||||
}
|
||||
|
||||
|
||||
@@ -260,6 +275,9 @@ void control_write_mirror_response( enum mirror_state mirror_state, int client_f
|
||||
case MS_FAIL_SIZE_MISMATCH:
|
||||
write_socket( "1: Remote size does not match local size" );
|
||||
break;
|
||||
case MS_ABANDONED:
|
||||
write_socket( "1: Mirroring abandoned" );
|
||||
break;
|
||||
case MS_GO:
|
||||
case MS_DONE: /* Yes, I know we know better, but it's simpler this way */
|
||||
write_socket( "0: Mirror started" );
|
||||
@@ -292,7 +310,6 @@ enum mirror_state control_client_mirror_wait(
|
||||
return mirror_state;
|
||||
}
|
||||
|
||||
|
||||
#define write_socket(msg) write(client->socket, (msg "\n"), strlen((msg))+1)
|
||||
/** Command parser to start mirror process from socket input */
|
||||
int control_mirror(struct control_client* client, int linesc, char** lines)
|
||||
@@ -302,7 +319,7 @@ int control_mirror(struct control_client* client, int linesc, char** lines)
|
||||
struct flexnbd * flexnbd = client->flexnbd;
|
||||
union mysockaddr *connect_to = xmalloc( sizeof( union mysockaddr ) );
|
||||
union mysockaddr *connect_from = NULL;
|
||||
uint64_t max_Bps = 0;
|
||||
uint64_t max_Bps = UINT64_MAX;
|
||||
int action_at_finish;
|
||||
int raw_port;
|
||||
|
||||
@@ -324,22 +341,15 @@ int control_mirror(struct control_client* client, int linesc, char** lines)
|
||||
}
|
||||
connect_to->v4.sin_port = htobe16(raw_port);
|
||||
|
||||
action_at_finish = ACTION_EXIT;
|
||||
if (linesc > 2) {
|
||||
connect_from = xmalloc( sizeof( union mysockaddr ) );
|
||||
if (parse_ip_to_sockaddr(&connect_from->generic, lines[2]) == 0) {
|
||||
write_socket("1: bad bind address");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (linesc > 3) { max_Bps = atoi(lines[2]); }
|
||||
|
||||
action_at_finish = ACTION_EXIT;
|
||||
if (linesc > 4) {
|
||||
if (strcmp("exit", lines[3]) == 0) {
|
||||
if (strcmp("exit", lines[2]) == 0) {
|
||||
action_at_finish = ACTION_EXIT;
|
||||
}
|
||||
else if (strcmp("nothing", lines[3]) == 0) {
|
||||
else if (strcmp( "unlink", lines[2]) == 0 ) {
|
||||
action_at_finish = ACTION_UNLINK;
|
||||
}
|
||||
else if (strcmp("nothing", lines[2]) == 0) {
|
||||
action_at_finish = ACTION_NOTHING;
|
||||
}
|
||||
else {
|
||||
@@ -348,19 +358,37 @@ int control_mirror(struct control_client* client, int linesc, char** lines)
|
||||
}
|
||||
}
|
||||
|
||||
if (linesc > 3) {
|
||||
connect_from = xmalloc( sizeof( union mysockaddr ) );
|
||||
if (parse_ip_to_sockaddr(&connect_from->generic, lines[3]) == 0) {
|
||||
write_socket("1: bad bind address");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (linesc > 4) {
|
||||
errno = 0;
|
||||
max_Bps = strtoull( lines[4], NULL, 10 );
|
||||
if ( errno == ERANGE ) {
|
||||
write_socket( "1: max_bps out of range" );
|
||||
return -1;
|
||||
} else if ( errno != 0 ) {
|
||||
write_socket( "1: max_bps couldn't be parsed" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (linesc > 5) {
|
||||
write_socket("1: unrecognised parameters to mirror");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* In theory, we should never have to worry about the switch
|
||||
* lock here, since we should never be able to start more than
|
||||
* one mirror at a time. This is enforced by only accepting a
|
||||
* single client at a time on the control socket.
|
||||
*/
|
||||
flexnbd_lock_switch( flexnbd );
|
||||
{
|
||||
struct server * serve = flexnbd_server(flexnbd);
|
||||
|
||||
server_lock_start_mirror( serve );
|
||||
{
|
||||
if ( server_mirror_can_start( serve ) ) {
|
||||
serve->mirror_super = mirror_super_create(
|
||||
serve->filename,
|
||||
connect_to,
|
||||
@@ -369,7 +397,24 @@ int control_mirror(struct control_client* client, int linesc, char** lines)
|
||||
action_at_finish,
|
||||
client->mirror_state_mbox );
|
||||
serve->mirror = serve->mirror_super->mirror;
|
||||
server_prevent_mirror_start( serve );
|
||||
} else {
|
||||
if ( serve->mirror_super ) {
|
||||
warn( "Tried to start a second mirror run" );
|
||||
write_socket( "1: mirror already running" );
|
||||
} else {
|
||||
warn( "Cannot start mirroring, shutting down" );
|
||||
write_socket( "1: shutting down" );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
server_unlock_start_mirror( serve );
|
||||
|
||||
/* Do this outside the lock to minimise the length of time the
|
||||
* sighandler can block the serve thread
|
||||
*/
|
||||
if ( serve->mirror_super ) {
|
||||
FATAL_IF( 0 != pthread_create(
|
||||
&serve->mirror_super->thread,
|
||||
NULL,
|
||||
@@ -385,15 +430,47 @@ int control_mirror(struct control_client* client, int linesc, char** lines)
|
||||
debug("Control thread writing response");
|
||||
control_write_mirror_response( state, client->socket );
|
||||
}
|
||||
debug( "Control thread unlocking switch" );
|
||||
flexnbd_unlock_switch( flexnbd );
|
||||
|
||||
debug( "Control thread going away." );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef write_socket
|
||||
int control_mirror_max_bps( struct control_client* client, int linesc, char** lines )
|
||||
{
|
||||
NULLCHECK( client );
|
||||
NULLCHECK( client->flexnbd );
|
||||
|
||||
struct server* serve = flexnbd_server( client->flexnbd );
|
||||
uint64_t max_Bps;
|
||||
|
||||
if ( !serve->mirror_super ) {
|
||||
write_socket( "1: Not currently mirroring" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( linesc != 1 ) {
|
||||
write_socket( "1: Bad format" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
errno = 0;
|
||||
max_Bps = strtoull( lines[0], NULL, 10 );
|
||||
if ( errno == ERANGE ) {
|
||||
write_socket( "1: max_bps out of range" );
|
||||
return -1;
|
||||
} else if ( errno != 0 ) {
|
||||
write_socket( "1: max_bps couldn't be parsed" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
serve->mirror->max_bytes_per_second = max_Bps;
|
||||
write_socket( "0: updated" );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef write_socket
|
||||
|
||||
/** Command parser to alter access control list from socket input */
|
||||
int control_acl(struct control_client* client, int linesc, char** lines)
|
||||
@@ -406,6 +483,7 @@ int control_acl(struct control_client* client, int linesc, char** lines)
|
||||
struct acl * new_acl = acl_create( linesc, lines, default_deny );
|
||||
|
||||
if (new_acl->len != linesc) {
|
||||
warn("Bad ACL spec: %s", lines[new_acl->len] );
|
||||
write(client->socket, "1: bad spec: ", 13);
|
||||
write(client->socket, lines[new_acl->len],
|
||||
strlen(lines[new_acl->len]));
|
||||
@@ -414,12 +492,59 @@ int control_acl(struct control_client* client, int linesc, char** lines)
|
||||
}
|
||||
else {
|
||||
flexnbd_replace_acl( flexnbd, new_acl );
|
||||
write( client->socket, "0: updated", 10);
|
||||
info("ACL set");
|
||||
write( client->socket, "0: updated\n", 11);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int control_break(
|
||||
struct control_client* client,
|
||||
int linesc __attribute__ ((unused)),
|
||||
char** lines __attribute__((unused))
|
||||
)
|
||||
{
|
||||
NULLCHECK( client );
|
||||
NULLCHECK( client->flexnbd );
|
||||
|
||||
int result = 0;
|
||||
struct flexnbd* flexnbd = client->flexnbd;
|
||||
|
||||
struct server * serve = flexnbd_server( flexnbd );
|
||||
|
||||
server_lock_start_mirror( serve );
|
||||
{
|
||||
if ( server_is_mirroring( serve ) ) {
|
||||
|
||||
info( "Signaling to abandon mirror" );
|
||||
server_abandon_mirror( serve );
|
||||
debug( "Abandon signaled" );
|
||||
|
||||
if ( server_is_closed( serve ) ) {
|
||||
info( "Mirror completed while canceling" );
|
||||
write( client->socket,
|
||||
"1: mirror completed\n", 20 );
|
||||
}
|
||||
else {
|
||||
info( "Mirror successfully stopped." );
|
||||
write( client->socket,
|
||||
"0: mirror stopped\n", 18 );
|
||||
result = 1;
|
||||
}
|
||||
|
||||
} else {
|
||||
warn( "Not mirroring." );
|
||||
write( client->socket, "1: not mirroring\n", 17 );
|
||||
}
|
||||
}
|
||||
server_unlock_start_mirror( serve );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/** FIXME: add some useful statistics */
|
||||
int control_status(
|
||||
struct control_client* client,
|
||||
@@ -444,9 +569,7 @@ void control_client_cleanup(struct control_client* client,
|
||||
if (client->socket) { close(client->socket); }
|
||||
|
||||
/* This is wrongness */
|
||||
if ( server_io_locked( client->flexnbd->serve ) ) { server_unlock_io( client->flexnbd->serve ); }
|
||||
if ( server_acl_locked( client->flexnbd->serve ) ) { server_unlock_acl( client->flexnbd->serve ); }
|
||||
if ( flexnbd_switch_locked( client->flexnbd ) ) { flexnbd_unlock_switch( client->flexnbd ); }
|
||||
|
||||
control_client_destroy( client );
|
||||
}
|
||||
@@ -478,11 +601,22 @@ void control_respond(struct control_client * client)
|
||||
debug("mirror command failed");
|
||||
}
|
||||
}
|
||||
else if (strcmp(lines[0], "break") == 0) {
|
||||
info( "break command received" );
|
||||
if ( control_break( client, linesc-1, lines+1) < 0) {
|
||||
debug( "break command failed" );
|
||||
}
|
||||
}
|
||||
else if (strcmp(lines[0], "status") == 0) {
|
||||
info("status command received" );
|
||||
if (control_status(client, linesc-1, lines+1) < 0) {
|
||||
debug("status command failed");
|
||||
}
|
||||
} else if ( strcmp( lines[0], "mirror_max_bps" ) == 0 ) {
|
||||
info( "mirror_max_bps command received" );
|
||||
if( control_mirror_max_bps( client, linesc-1, lines+1 ) < 0 ) {
|
||||
debug( "mirror_max_bps command failed" );
|
||||
}
|
||||
}
|
||||
else {
|
||||
write(client->socket, "10: unknown command\n", 23);
|
@@ -1,10 +1,14 @@
|
||||
#ifndef CONTROL_H
|
||||
#define CONTROL_H
|
||||
|
||||
/* We need this to avoid a complaint about struct server * in
|
||||
* void accept_control_connection
|
||||
*/
|
||||
struct server;
|
||||
|
||||
#include "parse.h"
|
||||
#include "mirror.h"
|
||||
#include "control.h"
|
||||
#include "serve.h"
|
||||
#include "flexnbd.h"
|
||||
#include "mbox.h"
|
||||
|
||||
@@ -15,6 +19,7 @@ struct control {
|
||||
|
||||
pthread_t thread;
|
||||
|
||||
struct self_pipe * open_signal;
|
||||
struct self_pipe * close_signal;
|
||||
|
||||
/* This is owned by the control object, and used by a
|
@@ -21,7 +21,6 @@
|
||||
|
||||
#include "flexnbd.h"
|
||||
#include "serve.h"
|
||||
#include "listen.h"
|
||||
#include "util.h"
|
||||
#include "control.h"
|
||||
#include "status.h"
|
||||
@@ -76,8 +75,6 @@ void flexnbd_create_shared(
|
||||
}
|
||||
|
||||
flexnbd->signal_fd = flexnbd_build_signal_fd();
|
||||
|
||||
flexnbd->switch_mutex = flexthread_mutex_create();
|
||||
}
|
||||
|
||||
|
||||
@@ -89,7 +86,8 @@ struct flexnbd * flexnbd_create_serving(
|
||||
int default_deny,
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients)
|
||||
int max_nbd_clients,
|
||||
int use_killswitch)
|
||||
{
|
||||
struct flexnbd * flexnbd = xmalloc( sizeof( struct flexnbd ) );
|
||||
flexnbd->serve = server_create(
|
||||
@@ -101,42 +99,53 @@ struct flexnbd * flexnbd_create_serving(
|
||||
acl_entries,
|
||||
s_acl_entries,
|
||||
max_nbd_clients,
|
||||
use_killswitch,
|
||||
1);
|
||||
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
||||
|
||||
// Beats installing one handler per client instance
|
||||
if ( use_killswitch ) {
|
||||
struct sigaction act = {
|
||||
.sa_sigaction = client_killswitch_hit,
|
||||
.sa_flags = SA_RESTART | SA_SIGINFO
|
||||
};
|
||||
|
||||
FATAL_UNLESS(
|
||||
0 == sigaction( CLIENT_KILLSWITCH_SIGNAL, &act, NULL ),
|
||||
"Installing client killswitch signal failed"
|
||||
);
|
||||
}
|
||||
|
||||
return flexnbd;
|
||||
}
|
||||
|
||||
|
||||
struct flexnbd * flexnbd_create_listening(
|
||||
char* s_ip_address,
|
||||
char* s_rebind_ip_address,
|
||||
char* s_port,
|
||||
char* s_rebind_port,
|
||||
char* s_file,
|
||||
char* s_ctrl_sock,
|
||||
int default_deny,
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients )
|
||||
char** s_acl_entries )
|
||||
{
|
||||
struct flexnbd * flexnbd = xmalloc( sizeof( struct flexnbd ) );
|
||||
flexnbd->listen = listen_create(
|
||||
flexnbd->serve = server_create(
|
||||
flexnbd,
|
||||
s_ip_address,
|
||||
s_rebind_ip_address,
|
||||
s_port,
|
||||
s_rebind_port,
|
||||
s_file,
|
||||
default_deny,
|
||||
acl_entries,
|
||||
s_acl_entries,
|
||||
max_nbd_clients);
|
||||
flexnbd->serve = flexnbd->listen->init_serve;
|
||||
1, 0, 0);
|
||||
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
||||
|
||||
// listen can't use killswitch, as mirror may pause on sending things
|
||||
// for a very long time.
|
||||
|
||||
return flexnbd;
|
||||
}
|
||||
|
||||
|
||||
void flexnbd_spawn_control(struct flexnbd * flexnbd )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
@@ -158,8 +167,10 @@ void flexnbd_stop_control( struct flexnbd * flexnbd )
|
||||
NULLCHECK( flexnbd->control );
|
||||
|
||||
control_signal_close( flexnbd->control );
|
||||
FATAL_UNLESS( 0 == pthread_join( flexnbd->control->thread, NULL ),
|
||||
pthread_t tid = flexnbd->control->thread;
|
||||
FATAL_UNLESS( 0 == pthread_join( tid, NULL ),
|
||||
"Failed joining the control thread" );
|
||||
debug( "Control thread %p pthread_join returned", tid );
|
||||
}
|
||||
|
||||
|
||||
@@ -175,53 +186,23 @@ void flexnbd_destroy( struct flexnbd * flexnbd )
|
||||
if ( flexnbd->control ) {
|
||||
control_destroy( flexnbd->control );
|
||||
}
|
||||
if ( flexnbd->listen ) {
|
||||
listen_destroy( flexnbd->listen );
|
||||
}
|
||||
|
||||
flexthread_mutex_destroy( flexnbd->switch_mutex );
|
||||
|
||||
close( flexnbd->signal_fd );
|
||||
free( flexnbd );
|
||||
}
|
||||
|
||||
|
||||
/* THOU SHALT NOT DEREFERENCE flexnbd->serve OUTSIDE A SWITCH LOCK
|
||||
*/
|
||||
void flexnbd_lock_switch( struct flexnbd * flexnbd )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
flexthread_mutex_lock( flexnbd->switch_mutex );
|
||||
}
|
||||
|
||||
void flexnbd_unlock_switch( struct flexnbd * flexnbd )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
flexthread_mutex_unlock( flexnbd->switch_mutex );
|
||||
}
|
||||
|
||||
int flexnbd_switch_locked( struct flexnbd * flexnbd )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
return flexthread_mutex_held( flexnbd->switch_mutex );
|
||||
}
|
||||
|
||||
struct server * flexnbd_server( struct flexnbd * flexnbd )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
return flexnbd->serve;
|
||||
}
|
||||
|
||||
|
||||
void flexnbd_replace_acl( struct flexnbd * flexnbd, struct acl * acl )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
flexnbd_lock_switch( flexnbd );
|
||||
{
|
||||
server_replace_acl( flexnbd_server(flexnbd), acl );
|
||||
}
|
||||
flexnbd_unlock_switch( flexnbd );
|
||||
}
|
||||
|
||||
|
||||
struct status * flexnbd_status_create( struct flexnbd * flexnbd )
|
||||
@@ -229,16 +210,10 @@ struct status * flexnbd_status_create( struct flexnbd * flexnbd )
|
||||
NULLCHECK( flexnbd );
|
||||
struct status * status;
|
||||
|
||||
flexnbd_lock_switch( flexnbd );
|
||||
{
|
||||
status = status_create( flexnbd_server( flexnbd ) );
|
||||
}
|
||||
flexnbd_unlock_switch( flexnbd );
|
||||
return status;
|
||||
}
|
||||
|
||||
/** THOU SHALT *ONLY* CALL THIS FROM INSIDE A SWITCH LOCK
|
||||
*/
|
||||
void flexnbd_set_server( struct flexnbd * flexnbd, struct server * serve )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
@@ -246,40 +221,21 @@ void flexnbd_set_server( struct flexnbd * flexnbd, struct server * serve )
|
||||
}
|
||||
|
||||
|
||||
/* Calls the given callback to exchange server objects, then sets
|
||||
* flexnbd->server so everything else can see it. */
|
||||
void flexnbd_switch( struct flexnbd * flexnbd, struct server *(listen_cb)(struct listen *) )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
NULLCHECK( flexnbd->listen );
|
||||
|
||||
flexnbd_lock_switch( flexnbd );
|
||||
{
|
||||
struct server * new_server = listen_cb( flexnbd->listen );
|
||||
NULLCHECK( new_server );
|
||||
flexnbd_set_server( flexnbd, new_server );
|
||||
}
|
||||
flexnbd_unlock_switch( flexnbd );
|
||||
|
||||
}
|
||||
|
||||
/* Get the default_deny of the current server object. This takes the
|
||||
* switch_lock to avoid nastiness if the server switches and gets freed
|
||||
* in the dereference chain.
|
||||
* This means that this function must not be called if the switch lock
|
||||
* is already held.
|
||||
*/
|
||||
/* Get the default_deny of the current server object. */
|
||||
int flexnbd_default_deny( struct flexnbd * flexnbd )
|
||||
{
|
||||
int result;
|
||||
|
||||
NULLCHECK( flexnbd );
|
||||
flexnbd_lock_switch( flexnbd );
|
||||
{
|
||||
result = server_default_deny( flexnbd->serve );
|
||||
return server_default_deny( flexnbd->serve );
|
||||
}
|
||||
flexnbd_unlock_switch( flexnbd );
|
||||
return result;
|
||||
|
||||
|
||||
void make_writable( const char * filename )
|
||||
{
|
||||
NULLCHECK( filename );
|
||||
FATAL_IF_NEGATIVE( chmod( filename, S_IWUSR ),
|
||||
"Couldn't chmod %s: %s",
|
||||
filename,
|
||||
strerror( errno ) );
|
||||
}
|
||||
|
||||
|
||||
@@ -287,22 +243,16 @@ int flexnbd_serve( struct flexnbd * flexnbd )
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
int success;
|
||||
struct self_pipe * open_signal = NULL;
|
||||
|
||||
if ( flexnbd->control ){
|
||||
debug( "Spawning control thread" );
|
||||
flexnbd_spawn_control( flexnbd );
|
||||
open_signal = flexnbd->control->open_signal;
|
||||
}
|
||||
|
||||
if ( flexnbd->listen ){
|
||||
success = do_listen( flexnbd->listen );
|
||||
}
|
||||
else {
|
||||
do_serve( flexnbd->serve );
|
||||
/* We can't tell here what the intent was. We can
|
||||
* legitimately exit either in control or not.
|
||||
*/
|
||||
success = 1;
|
||||
}
|
||||
success = do_serve( flexnbd->serve, open_signal );
|
||||
debug("do_serve success is %d", success );
|
||||
|
||||
if ( flexnbd->control ) {
|
||||
debug( "Stopping control thread" );
|
@@ -4,7 +4,8 @@
|
||||
#include "acl.h"
|
||||
#include "mirror.h"
|
||||
#include "serve.h"
|
||||
#include "listen.h"
|
||||
#include "proxy.h"
|
||||
#include "client.h"
|
||||
#include "self_pipe.h"
|
||||
#include "mbox.h"
|
||||
#include "control.h"
|
||||
@@ -12,29 +13,21 @@
|
||||
|
||||
/* Carries the "globals". */
|
||||
struct flexnbd {
|
||||
|
||||
/* We always have a serve pointer, but it should never be
|
||||
* dereferenced outside a flexnbd_switch_lock/unlock pair.
|
||||
/* Our serve pointer should never be dereferenced outside a
|
||||
* flexnbd_switch_lock/unlock pair.
|
||||
*/
|
||||
struct server * serve;
|
||||
/* We only have a listen object if the process was started in
|
||||
* listen mode.
|
||||
*/
|
||||
struct listen * listen;
|
||||
|
||||
/* We only have a control object if a control socket name was
|
||||
* passed on the command line.
|
||||
*/
|
||||
struct control * control;
|
||||
|
||||
/* switch_mutex is the lock around dereferencing the serve
|
||||
* pointer.
|
||||
*/
|
||||
struct flexthread_mutex * switch_mutex;
|
||||
|
||||
/* File descriptor for a signalfd(2) signal stream. */
|
||||
int signal_fd;
|
||||
};
|
||||
|
||||
|
||||
struct flexnbd * flexnbd_create(void);
|
||||
struct flexnbd * flexnbd_create_serving(
|
||||
char* s_ip_address,
|
||||
@@ -44,34 +37,30 @@ struct flexnbd * flexnbd_create_serving(
|
||||
int default_deny,
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients);
|
||||
int max_nbd_clients,
|
||||
int use_killswitch);
|
||||
|
||||
struct flexnbd * flexnbd_create_listening(
|
||||
char* s_ip_address,
|
||||
char* s_rebind_ip_address,
|
||||
char* s_port,
|
||||
char* s_rebind_port,
|
||||
char* s_file,
|
||||
char* s_ctrl_sock,
|
||||
int default_deny,
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients );
|
||||
char** s_acl_entries );
|
||||
|
||||
void flexnbd_destroy( struct flexnbd * );
|
||||
enum mirror_state;
|
||||
enum mirror_state flexnbd_get_mirror_state( struct flexnbd * );
|
||||
void flexnbd_lock_switch( struct flexnbd * );
|
||||
void flexnbd_unlock_switch( struct flexnbd * );
|
||||
int flexnbd_switch_locked( struct flexnbd * );
|
||||
int flexnbd_default_deny( struct flexnbd * );
|
||||
void flexnbd_set_server( struct flexnbd * flexnbd, struct server * serve );
|
||||
void flexnbd_switch( struct flexnbd * flexnbd, struct server *(listen_cb)(struct listen *) );
|
||||
int flexnbd_signal_fd( struct flexnbd * flexnbd );
|
||||
|
||||
|
||||
int flexnbd_serve( struct flexnbd * flexnbd );
|
||||
int flexnbd_proxy( struct flexnbd * flexnbd );
|
||||
struct server * flexnbd_server( struct flexnbd * flexnbd );
|
||||
void flexnbd_replace_acl( struct flexnbd * flexnbd, struct acl * acl );
|
||||
struct status * flexnbd_status_create( struct flexnbd * flexnbd );
|
||||
#endif
|
||||
|
1071
src/server/mirror.c
Normal file
1071
src/server/mirror.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -18,6 +18,18 @@ enum mirror_state;
|
||||
*/
|
||||
#define MS_CONNECT_TIME_SECS 60
|
||||
|
||||
/* MS_MAX_DOWNTIME_SECS
|
||||
* The length of time a migration must be estimated to have remaining for us to
|
||||
* disconnect clients for convergence
|
||||
*
|
||||
* TODO: Make this configurable so refusing-to-converge clients can be manually
|
||||
* fixed.
|
||||
* TODO: Make this adaptive - 5 seconds is fine, as long as we can guarantee
|
||||
* that all migrations will be able to converge in time. We'd add a new
|
||||
* state between open and closed, where gradually-increasing latency is
|
||||
* added to client requests to allow the mirror to be faster.
|
||||
*/
|
||||
#define MS_CONVERGE_TIME_SECS 5
|
||||
|
||||
/* MS_HELLO_TIME_SECS
|
||||
* The length of time the sender will wait for the NBD hello message
|
||||
@@ -38,12 +50,16 @@ enum mirror_state;
|
||||
* request, this is the time between the end of the NBD request and the
|
||||
* start of the NBD reply. For a write request, this is the time
|
||||
* between the end of the written data and the start of the NBD reply.
|
||||
* Can be overridden by the environment variable:
|
||||
* FLEXNBD_MS_REQUEST_LIMIT_SECS
|
||||
*/
|
||||
#define MS_REQUEST_LIMIT_SECS 4
|
||||
|
||||
#define MS_REQUEST_LIMIT_SECS 60
|
||||
#define MS_REQUEST_LIMIT_SECS_F 60.0
|
||||
|
||||
enum mirror_finish_action {
|
||||
ACTION_EXIT,
|
||||
ACTION_UNLINK,
|
||||
ACTION_NOTHING
|
||||
};
|
||||
|
||||
@@ -51,6 +67,7 @@ enum mirror_state {
|
||||
MS_UNKNOWN,
|
||||
MS_INIT,
|
||||
MS_GO,
|
||||
MS_ABANDONED,
|
||||
MS_DONE,
|
||||
MS_FAIL_CONNECT,
|
||||
MS_FAIL_REJECTED,
|
||||
@@ -60,17 +77,25 @@ enum mirror_state {
|
||||
|
||||
struct mirror {
|
||||
pthread_t thread;
|
||||
/* set to 1, then join thread to make mirror terminate early */
|
||||
int signal_abandon;
|
||||
|
||||
/* Signal to this then join the thread if you want to abandon mirroring */
|
||||
struct self_pipe * abandon_signal;
|
||||
|
||||
union mysockaddr * connect_to;
|
||||
union mysockaddr * connect_from;
|
||||
int client;
|
||||
const char * filename;
|
||||
off64_t max_bytes_per_second;
|
||||
|
||||
/* Limiter, used to restrict migration speed Only dirty bytes (those going
|
||||
* over the network) are considered */
|
||||
uint64_t max_bytes_per_second;
|
||||
|
||||
enum mirror_finish_action action_at_finish;
|
||||
|
||||
char *mapped;
|
||||
struct bitset_mapping *dirty_map;
|
||||
|
||||
/* We need to send every byte at least once; we do so by */
|
||||
uint64_t offset;
|
||||
|
||||
enum mirror_state commit_state;
|
||||
|
||||
@@ -78,6 +103,13 @@ struct mirror {
|
||||
* and checking the remote size, whether successful or not.
|
||||
*/
|
||||
struct mbox * commit_signal;
|
||||
|
||||
/* The time (from monotonic_time_ms()) the migration was started. Can be
|
||||
* used to calculate bps, etc. */
|
||||
uint64_t migration_started;
|
||||
|
||||
/* Running count of all bytes we've transferred */
|
||||
uint64_t all_dirty;
|
||||
};
|
||||
|
||||
|
||||
@@ -99,9 +131,11 @@ struct mirror_super * mirror_super_create(
|
||||
const char * filename,
|
||||
union mysockaddr * connect_to,
|
||||
union mysockaddr * connect_from,
|
||||
int max_Bps,
|
||||
int action_at_finish,
|
||||
uint64_t max_Bps,
|
||||
enum mirror_finish_action action_at_finish,
|
||||
struct mbox * state_mbox
|
||||
);
|
||||
void * mirror_super_runner( void * serve_uncast );
|
||||
|
||||
#endif
|
||||
|
@@ -15,10 +15,11 @@ static struct option serve_options[] = {
|
||||
GETOPT_SOCK,
|
||||
GETOPT_DENY,
|
||||
GETOPT_QUIET,
|
||||
GETOPT_KILLSWITCH,
|
||||
GETOPT_VERBOSE,
|
||||
{0}
|
||||
};
|
||||
static char serve_short_options[] = "hl:p:f:s:d" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char serve_short_options[] = "hl:p:f:s:dk" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char serve_help_text[] =
|
||||
"Usage: flexnbd " CMD_SERVE " <options> [<acl address>*]\n\n"
|
||||
"Serve FILE from ADDR:PORT, with an optional control socket at SOCK.\n\n"
|
||||
@@ -27,6 +28,7 @@ static char serve_help_text[] =
|
||||
"\t--" OPT_PORT ",-p <PORT>\tThe port to serve on.\n"
|
||||
"\t--" OPT_FILE ",-f <FILE>\tThe file to serve.\n"
|
||||
"\t--" OPT_DENY ",-d\tDeny connections by default unless in ACL.\n"
|
||||
"\t--" OPT_KILLSWITCH",-k \tKill the server if a request takes 120 seconds.\n"
|
||||
SOCK_LINE
|
||||
VERBOSE_LINE
|
||||
QUIET_LINE;
|
||||
@@ -35,9 +37,7 @@ static char serve_help_text[] =
|
||||
static struct option listen_options[] = {
|
||||
GETOPT_HELP,
|
||||
GETOPT_ADDR,
|
||||
GETOPT_REBIND_ADDR,
|
||||
GETOPT_PORT,
|
||||
GETOPT_REBIND_PORT,
|
||||
GETOPT_FILE,
|
||||
GETOPT_SOCK,
|
||||
GETOPT_DENY,
|
||||
@@ -45,24 +45,19 @@ static struct option listen_options[] = {
|
||||
GETOPT_VERBOSE,
|
||||
{0}
|
||||
};
|
||||
static char listen_short_options[] = "hl:L:p:P:f:s:d" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char listen_short_options[] = "hl:p:f:s:d" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char listen_help_text[] =
|
||||
"Usage: flexnbd " CMD_LISTEN " <options> [<acl_address>*]\n\n"
|
||||
"Listen for an incoming migration on ADDR:PORT, "
|
||||
"then switch to REBIND_ADDR:REBIND_PORT on completion "
|
||||
"to serve FILE.\n\n"
|
||||
"Listen for an incoming migration on ADDR:PORT."
|
||||
HELP_LINE
|
||||
"\t--" OPT_ADDR ",-l <ADDR>\tThe address to listen on.\n"
|
||||
"\t--" OPT_REBIND_ADDR ",-L <REBIND_ADDR>\tThe address to switch to, if given.\n"
|
||||
"\t--" OPT_PORT ",-p <PORT>\tThe port to listen on.\n"
|
||||
"\t--" OPT_REBIND_PORT ",-P <REBIND_PORT>\tThe port to switch to, if given..\n"
|
||||
"\t--" OPT_FILE ",-f <FILE>\tThe file to serve.\n"
|
||||
"\t--" OPT_DENY ",-d\tDeny connections by default unless in ACL.\n"
|
||||
SOCK_LINE
|
||||
VERBOSE_LINE
|
||||
QUIET_LINE;
|
||||
|
||||
|
||||
static struct option read_options[] = {
|
||||
GETOPT_HELP,
|
||||
GETOPT_ADDR,
|
||||
@@ -118,17 +113,36 @@ static char acl_help_text[] =
|
||||
VERBOSE_LINE
|
||||
QUIET_LINE;
|
||||
|
||||
static struct option mirror_speed_options[] = {
|
||||
GETOPT_HELP,
|
||||
GETOPT_SOCK,
|
||||
GETOPT_MAX_SPEED,
|
||||
GETOPT_QUIET,
|
||||
GETOPT_VERBOSE,
|
||||
{0}
|
||||
};
|
||||
static char mirror_speed_short_options[] = "hs:m:" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char mirror_speed_help_text[] =
|
||||
"Usage: flexnbd " CMD_MIRROR_SPEED " <options>\n\n"
|
||||
"Set the maximum speed of a migration from a mirring server listening on SOCK.\n\n"
|
||||
HELP_LINE
|
||||
SOCK_LINE
|
||||
MAX_SPEED_LINE
|
||||
VERBOSE_LINE
|
||||
QUIET_LINE;
|
||||
|
||||
static struct option mirror_options[] = {
|
||||
GETOPT_HELP,
|
||||
GETOPT_SOCK,
|
||||
GETOPT_ADDR,
|
||||
GETOPT_PORT,
|
||||
GETOPT_UNLINK,
|
||||
GETOPT_BIND,
|
||||
GETOPT_QUIET,
|
||||
GETOPT_VERBOSE,
|
||||
{0}
|
||||
};
|
||||
static char mirror_short_options[] = "hs:l:p:b:" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char mirror_short_options[] = "hs:l:p:ub:" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char mirror_help_text[] =
|
||||
"Usage: flexnbd " CMD_MIRROR " <options>\n\n"
|
||||
"Start mirroring from the server with control socket SOCK to one at ADDR:PORT.\n\n"
|
||||
@@ -136,10 +150,27 @@ static char mirror_help_text[] =
|
||||
"\t--" OPT_ADDR ",-l <ADDR>\tThe address to mirror to.\n"
|
||||
"\t--" OPT_PORT ",-p <PORT>\tThe port to mirror to.\n"
|
||||
SOCK_LINE
|
||||
"\t--" OPT_UNLINK ",-u\tUnlink the local file when done.\n"
|
||||
BIND_LINE
|
||||
VERBOSE_LINE
|
||||
QUIET_LINE;
|
||||
|
||||
static struct option break_options[] = {
|
||||
GETOPT_HELP,
|
||||
GETOPT_SOCK,
|
||||
GETOPT_QUIET,
|
||||
GETOPT_VERBOSE,
|
||||
{0}
|
||||
};
|
||||
static char break_short_options[] = "hs:" SOPT_QUIET SOPT_VERBOSE;
|
||||
static char break_help_text[] =
|
||||
"Usage: flexnbd " CMD_BREAK " <options>\n\n"
|
||||
"Stop mirroring from the server with control socket SOCK.\n\n"
|
||||
HELP_LINE
|
||||
SOCK_LINE
|
||||
VERBOSE_LINE
|
||||
QUIET_LINE;
|
||||
|
||||
|
||||
static struct option status_options[] = {
|
||||
GETOPT_HELP,
|
||||
@@ -161,10 +192,13 @@ char help_help_text_arr[] =
|
||||
"Usage: flexnbd <cmd> [cmd options]\n\n"
|
||||
"Commands:\n"
|
||||
"\tflexnbd serve\n"
|
||||
"\tflexnbd listen\n"
|
||||
"\tflexnbd read\n"
|
||||
"\tflexnbd write\n"
|
||||
"\tflexnbd acl\n"
|
||||
"\tflexnbd mirror\n"
|
||||
"\tflexnbd mirror-speed\n"
|
||||
"\tflexnbd break\n"
|
||||
"\tflexnbd status\n"
|
||||
"\tflexnbd help\n\n"
|
||||
"See flexnbd help <cmd> for further info\n";
|
||||
@@ -175,19 +209,17 @@ char * help_help_text = help_help_text_arr;
|
||||
|
||||
|
||||
|
||||
int do_serve(struct server* params);
|
||||
void do_read(struct mode_readwrite_params* params);
|
||||
void do_write(struct mode_readwrite_params* params);
|
||||
void do_remote_command(char* command, char* mode, int argc, char** argv);
|
||||
|
||||
|
||||
void read_serve_param( int c, char **ip_addr, char **ip_port, char **file, char **sock, int *default_deny )
|
||||
void read_serve_param( int c, char **ip_addr, char **ip_port, char **file, char **sock, int *default_deny, int *use_killswitch )
|
||||
{
|
||||
switch(c){
|
||||
case 'h':
|
||||
fprintf(stdout, "%s\n", serve_help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 'l':
|
||||
*ip_addr = optarg;
|
||||
break;
|
||||
@@ -204,11 +236,14 @@ void read_serve_param( int c, char **ip_addr, char **ip_port, char **file, char
|
||||
*default_deny = 1;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = 4;
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
case 'v':
|
||||
log_level = VERBOSE_LOG_LEVEL;
|
||||
break;
|
||||
case 'k':
|
||||
*use_killswitch = 1;
|
||||
break;
|
||||
default:
|
||||
exit_err( serve_help_text );
|
||||
break;
|
||||
@@ -218,9 +253,7 @@ void read_serve_param( int c, char **ip_addr, char **ip_port, char **file, char
|
||||
|
||||
void read_listen_param( int c,
|
||||
char **ip_addr,
|
||||
char **rebind_ip_addr,
|
||||
char **ip_port,
|
||||
char **rebind_ip_port,
|
||||
char **file,
|
||||
char **sock,
|
||||
int *default_deny )
|
||||
@@ -229,19 +262,12 @@ void read_listen_param( int c,
|
||||
case 'h':
|
||||
fprintf(stdout, "%s\n", listen_help_text );
|
||||
exit(0);
|
||||
break;
|
||||
case 'l':
|
||||
*ip_addr = optarg;
|
||||
break;
|
||||
case 'L':
|
||||
*rebind_ip_addr = optarg;
|
||||
break;
|
||||
case 'p':
|
||||
*ip_port = optarg;
|
||||
break;
|
||||
case 'P':
|
||||
*rebind_ip_port = optarg;
|
||||
break;
|
||||
case 'f':
|
||||
*file = optarg;
|
||||
break;
|
||||
@@ -252,7 +278,7 @@ void read_listen_param( int c,
|
||||
*default_deny = 1;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = 4;
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
case 'v':
|
||||
log_level = VERBOSE_LOG_LEVEL;
|
||||
@@ -263,13 +289,12 @@ void read_listen_param( int c,
|
||||
}
|
||||
}
|
||||
|
||||
void read_readwrite_param( int c, char **ip_addr, char **ip_port, char **bind_addr, char **from, char **size)
|
||||
void read_readwrite_param( int c, char **ip_addr, char **ip_port, char **bind_addr, char **from, char **size, char *err_text )
|
||||
{
|
||||
switch(c){
|
||||
case 'h':
|
||||
fprintf(stdout, "%s\n", read_help_text );
|
||||
fprintf(stdout, "%s\n", err_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 'l':
|
||||
*ip_addr = optarg;
|
||||
break;
|
||||
@@ -286,13 +311,13 @@ void read_readwrite_param( int c, char **ip_addr, char **ip_port, char **bind_ad
|
||||
*bind_addr = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = 4;
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
case 'v':
|
||||
log_level = VERBOSE_LOG_LEVEL;
|
||||
break;
|
||||
default:
|
||||
exit_err( read_help_text );
|
||||
exit_err( err_text );
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -303,12 +328,11 @@ void read_sock_param( int c, char **sock, char *help_text )
|
||||
case 'h':
|
||||
fprintf( stdout, "%s\n", help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 's':
|
||||
*sock = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = 4;
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
case 'v':
|
||||
log_level = VERBOSE_LOG_LEVEL;
|
||||
@@ -324,13 +348,47 @@ void read_acl_param( int c, char **sock )
|
||||
read_sock_param( c, sock, acl_help_text );
|
||||
}
|
||||
|
||||
void read_mirror_param( int c, char **sock, char **ip_addr, char **ip_port, char **bind_addr )
|
||||
void read_mirror_speed_param(
|
||||
int c,
|
||||
char **sock,
|
||||
char **max_speed
|
||||
)
|
||||
{
|
||||
switch( c ) {
|
||||
case 'h':
|
||||
fprintf( stdout, "%s\n", mirror_speed_help_text );
|
||||
exit( 0 );
|
||||
case 's':
|
||||
*sock = optarg;
|
||||
break;
|
||||
case 'm':
|
||||
*max_speed = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
case 'v':
|
||||
log_level = VERBOSE_LOG_LEVEL;
|
||||
break;
|
||||
default:
|
||||
exit_err( mirror_speed_help_text );
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void read_mirror_param(
|
||||
int c,
|
||||
char **sock,
|
||||
char **ip_addr,
|
||||
char **ip_port,
|
||||
int *unlink,
|
||||
char **bind_addr )
|
||||
{
|
||||
switch( c ){
|
||||
case 'h':
|
||||
fprintf( stdout, "%s\n", mirror_help_text );
|
||||
exit( 0 );
|
||||
break;
|
||||
case 's':
|
||||
*sock = optarg;
|
||||
break;
|
||||
@@ -340,11 +398,14 @@ void read_mirror_param( int c, char **sock, char **ip_addr, char **ip_port, char
|
||||
case 'p':
|
||||
*ip_port = optarg;
|
||||
break;
|
||||
case 'u':
|
||||
*unlink = 1;
|
||||
break;
|
||||
case 'b':
|
||||
*bind_addr = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = 4;
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
case 'v':
|
||||
log_level = VERBOSE_LOG_LEVEL;
|
||||
@@ -355,6 +416,28 @@ void read_mirror_param( int c, char **sock, char **ip_addr, char **ip_port, char
|
||||
}
|
||||
}
|
||||
|
||||
void read_break_param( int c, char **sock )
|
||||
{
|
||||
switch( c ) {
|
||||
case 'h':
|
||||
fprintf( stdout, "%s\n", break_help_text );
|
||||
exit( 0 );
|
||||
case 's':
|
||||
*sock = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
log_level = QUIET_LOG_LEVEL;
|
||||
break;
|
||||
case 'v':
|
||||
log_level = VERBOSE_LOG_LEVEL;
|
||||
break;
|
||||
default:
|
||||
exit_err( break_help_text );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void read_status_param( int c, char **sock )
|
||||
{
|
||||
read_sock_param( c, sock, status_help_text );
|
||||
@@ -368,15 +451,18 @@ int mode_serve( int argc, char *argv[] )
|
||||
char *file = NULL;
|
||||
char *sock = NULL;
|
||||
int default_deny = 0; // not on by default
|
||||
int use_killswitch = 0;
|
||||
int err = 0;
|
||||
|
||||
int success;
|
||||
|
||||
struct flexnbd * flexnbd;
|
||||
|
||||
while (1) {
|
||||
c = getopt_long(argc, argv, serve_short_options, serve_options, NULL);
|
||||
if ( c == -1 ) { break; }
|
||||
|
||||
read_serve_param( c, &ip_addr, &ip_port, &file, &sock, &default_deny );
|
||||
read_serve_param( c, &ip_addr, &ip_port, &file, &sock, &default_deny, &use_killswitch );
|
||||
}
|
||||
|
||||
if ( NULL == ip_addr || NULL == ip_port ) {
|
||||
@@ -389,11 +475,12 @@ int mode_serve( int argc, char *argv[] )
|
||||
}
|
||||
if ( err ) { exit_err( serve_help_text ); }
|
||||
|
||||
flexnbd = flexnbd_create_serving( ip_addr, ip_port, file, sock, default_deny, argc - optind, argv + optind, MAX_NBD_CLIENTS );
|
||||
flexnbd_serve( flexnbd );
|
||||
flexnbd = flexnbd_create_serving( ip_addr, ip_port, file, sock, default_deny, argc - optind, argv + optind, MAX_NBD_CLIENTS, use_killswitch );
|
||||
info( "Serving file %s", file );
|
||||
success = flexnbd_serve( flexnbd );
|
||||
flexnbd_destroy( flexnbd );
|
||||
|
||||
return 0;
|
||||
return success ? 0 : 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -401,9 +488,7 @@ int mode_listen( int argc, char *argv[] )
|
||||
{
|
||||
int c;
|
||||
char *ip_addr = NULL;
|
||||
char *rebind_ip_addr = NULL;
|
||||
char *ip_port = NULL;
|
||||
char *rebind_ip_port = NULL;
|
||||
char *file = NULL;
|
||||
char *sock = NULL;
|
||||
int default_deny = 0; // not on by default
|
||||
@@ -417,7 +502,7 @@ int mode_listen( int argc, char *argv[] )
|
||||
c = getopt_long(argc, argv, listen_short_options, listen_options, NULL);
|
||||
if ( c == -1 ) { break; }
|
||||
|
||||
read_listen_param( c, &ip_addr, &rebind_ip_addr, &ip_port, &rebind_ip_port,
|
||||
read_listen_param( c, &ip_addr, &ip_port,
|
||||
&file, &sock, &default_deny );
|
||||
}
|
||||
|
||||
@@ -433,15 +518,12 @@ int mode_listen( int argc, char *argv[] )
|
||||
|
||||
flexnbd = flexnbd_create_listening(
|
||||
ip_addr,
|
||||
rebind_ip_addr,
|
||||
ip_port,
|
||||
rebind_ip_port,
|
||||
file,
|
||||
sock,
|
||||
default_deny,
|
||||
argc - optind,
|
||||
argv + optind,
|
||||
MAX_NBD_CLIENTS );
|
||||
argv + optind);
|
||||
success = flexnbd_serve( flexnbd );
|
||||
flexnbd_destroy( flexnbd );
|
||||
|
||||
@@ -491,7 +573,10 @@ void params_readwrite(
|
||||
|
||||
parse_port( s_port, &out->connect_to.v4 );
|
||||
|
||||
out->from = atol(s_from);
|
||||
long signed_from = atol(s_from);
|
||||
FATAL_IF_NEGATIVE( signed_from,
|
||||
"Can't read from a negative offset %d.", signed_from);
|
||||
out->from = signed_from;
|
||||
|
||||
if (write_not_read) {
|
||||
if (s_length_or_filename[0]-48 < 10) {
|
||||
@@ -503,9 +588,10 @@ void params_readwrite(
|
||||
s_length_or_filename, O_RDONLY);
|
||||
FATAL_IF_NEGATIVE(out->data_fd,
|
||||
"Couldn't open %s", s_length_or_filename);
|
||||
out->len = lseek64(out->data_fd, 0, SEEK_END);
|
||||
FATAL_IF_NEGATIVE(out->len,
|
||||
off64_t signed_len = lseek64(out->data_fd, 0, SEEK_END);
|
||||
FATAL_IF_NEGATIVE(signed_len,
|
||||
"Couldn't find length of %s", s_length_or_filename);
|
||||
out->len = signed_len;
|
||||
FATAL_IF_NEGATIVE(
|
||||
lseek64(out->data_fd, 0, SEEK_SET),
|
||||
"Couldn't rewind %s", s_length_or_filename
|
||||
@@ -536,7 +622,7 @@ int mode_read( int argc, char *argv[] )
|
||||
|
||||
if ( c == -1 ) { break; }
|
||||
|
||||
read_readwrite_param( c, &ip_addr, &ip_port, &bind_addr, &from, &size );
|
||||
read_readwrite_param( c, &ip_addr, &ip_port, &bind_addr, &from, &size, read_help_text );
|
||||
}
|
||||
|
||||
if ( NULL == ip_addr || NULL == ip_port ) {
|
||||
@@ -571,7 +657,7 @@ int mode_write( int argc, char *argv[] )
|
||||
c = getopt_long(argc, argv, write_short_options, write_options, NULL);
|
||||
if ( c == -1 ) { break; }
|
||||
|
||||
read_readwrite_param( c, &ip_addr, &ip_port, &bind_addr, &from, &size );
|
||||
read_readwrite_param( c, &ip_addr, &ip_port, &bind_addr, &from, &size, write_help_text );
|
||||
}
|
||||
|
||||
if ( NULL == ip_addr || NULL == ip_port ) {
|
||||
@@ -615,17 +701,52 @@ int mode_acl( int argc, char *argv[] )
|
||||
}
|
||||
|
||||
|
||||
int mode_mirror_speed( int argc, char *argv[] )
|
||||
{
|
||||
int c;
|
||||
char *sock = NULL;
|
||||
char *speed = NULL;
|
||||
|
||||
while( 1 ) {
|
||||
c = getopt_long( argc, argv, mirror_speed_short_options, mirror_speed_options, NULL );
|
||||
if ( -1 == c ) { break; }
|
||||
read_mirror_speed_param( c, &sock, &speed );
|
||||
}
|
||||
|
||||
if ( NULL == sock ) {
|
||||
fprintf( stderr, "--sock is required.\n" );
|
||||
exit_err( mirror_speed_help_text );
|
||||
}
|
||||
|
||||
if ( NULL == speed ) {
|
||||
fprintf( stderr, "--max-speed is required.\n");
|
||||
exit_err( mirror_speed_help_text );
|
||||
}
|
||||
|
||||
do_remote_command( "mirror_max_bps", sock, 1, &speed );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int mode_mirror( int argc, char *argv[] )
|
||||
{
|
||||
int c;
|
||||
char *sock = NULL;
|
||||
char *remote_argv[4] = {0};
|
||||
int err = 0;
|
||||
int unlink = 0;
|
||||
|
||||
remote_argv[2] = "exit";
|
||||
|
||||
while (1) {
|
||||
c = getopt_long( argc, argv, mirror_short_options, mirror_options, NULL);
|
||||
if ( -1 == c ) { break; }
|
||||
read_mirror_param( c, &sock, &remote_argv[0], &remote_argv[1], &remote_argv[2] );
|
||||
read_mirror_param( c,
|
||||
&sock,
|
||||
&remote_argv[0],
|
||||
&remote_argv[1],
|
||||
&unlink,
|
||||
&remote_argv[3] );
|
||||
}
|
||||
|
||||
if ( NULL == sock ){
|
||||
@@ -637,18 +758,40 @@ int mode_mirror( int argc, char *argv[] )
|
||||
err = 1;
|
||||
}
|
||||
if ( err ) { exit_err( mirror_help_text ); }
|
||||
if ( unlink ) { remote_argv[2] = "unlink"; }
|
||||
|
||||
if (remote_argv[2] == NULL) {
|
||||
do_remote_command( "mirror", sock, 2, remote_argv );
|
||||
if (remote_argv[3] == NULL) {
|
||||
do_remote_command( "mirror", sock, 3, remote_argv );
|
||||
}
|
||||
else {
|
||||
do_remote_command( "mirror", sock, 3, remote_argv );
|
||||
do_remote_command( "mirror", sock, 4, remote_argv );
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int mode_break( int argc, char *argv[] )
|
||||
{
|
||||
int c;
|
||||
char *sock = NULL;
|
||||
|
||||
while (1) {
|
||||
c = getopt_long( argc, argv, break_short_options, break_options, NULL );
|
||||
if ( -1 == c ) { break; }
|
||||
read_break_param( c, &sock );
|
||||
}
|
||||
|
||||
if ( NULL == sock ){
|
||||
fprintf( stderr, "--sock is required.\n" );
|
||||
exit_err( break_help_text );
|
||||
}
|
||||
|
||||
do_remote_command( "break", sock, argc - optind, argv + optind );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mode_status( int argc, char *argv[] )
|
||||
{
|
||||
int c;
|
||||
@@ -662,7 +805,7 @@ int mode_status( int argc, char *argv[] )
|
||||
|
||||
if ( NULL == sock ){
|
||||
fprintf( stderr, "--sock is required.\n" );
|
||||
exit_err( acl_help_text );
|
||||
exit_err( status_help_text );
|
||||
}
|
||||
|
||||
do_remote_command( "status", sock, argc - optind, argv + optind );
|
||||
@@ -670,7 +813,6 @@ int mode_status( int argc, char *argv[] )
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int mode_help( int argc, char *argv[] )
|
||||
{
|
||||
char *cmd;
|
||||
@@ -718,10 +860,15 @@ void mode(char* mode, int argc, char **argv)
|
||||
}
|
||||
else if ( IS_CMD( CMD_ACL, mode ) ) {
|
||||
mode_acl( argc, argv );
|
||||
} else if ( IS_CMD ( CMD_MIRROR_SPEED, mode ) ) {
|
||||
mode_mirror_speed( argc, argv );
|
||||
}
|
||||
else if ( IS_CMD( CMD_MIRROR, mode ) ) {
|
||||
mode_mirror( argc, argv );
|
||||
}
|
||||
else if ( IS_CMD( CMD_BREAK, mode ) ) {
|
||||
mode_break( argc, argv );
|
||||
}
|
||||
else if ( IS_CMD( CMD_STATUS, mode ) ) {
|
||||
mode_status( argc, argv );
|
||||
}
|
||||
@@ -735,4 +882,3 @@ void mode(char* mode, int argc, char **argv)
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
@@ -2,6 +2,7 @@
|
||||
#include "client.h"
|
||||
#include "nbdtypes.h"
|
||||
#include "ioutil.h"
|
||||
#include "sockutil.h"
|
||||
#include "util.h"
|
||||
#include "bitset.h"
|
||||
#include "control.h"
|
||||
@@ -20,22 +21,6 @@
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/tcp.h>
|
||||
|
||||
static inline void* sockaddr_address_data(struct sockaddr* sockaddr)
|
||||
{
|
||||
NULLCHECK( sockaddr );
|
||||
|
||||
struct sockaddr_in* in = (struct sockaddr_in*) sockaddr;
|
||||
struct sockaddr_in6* in6 = (struct sockaddr_in6*) sockaddr;
|
||||
|
||||
if (sockaddr->sa_family == AF_INET) {
|
||||
return &in->sin_addr;
|
||||
}
|
||||
if (sockaddr->sa_family == AF_INET6) {
|
||||
return &in6->sin6_addr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct server * server_create (
|
||||
struct flexnbd * flexnbd,
|
||||
char* s_ip_address,
|
||||
@@ -45,16 +30,20 @@ struct server * server_create (
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients,
|
||||
int has_control)
|
||||
int use_killswitch,
|
||||
int success)
|
||||
{
|
||||
NULLCHECK( flexnbd );
|
||||
struct server * out;
|
||||
out = xmalloc( sizeof( struct server ) );
|
||||
out->flexnbd = flexnbd;
|
||||
out->has_control = has_control;
|
||||
out->success = success;
|
||||
out->max_nbd_clients = max_nbd_clients;
|
||||
out->nbd_client = xmalloc( max_nbd_clients * sizeof( struct client_tbl_entry ) );
|
||||
out->use_killswitch = use_killswitch;
|
||||
|
||||
server_allow_new_clients( out );
|
||||
|
||||
out->nbd_client = xmalloc( max_nbd_clients * sizeof( struct client_tbl_entry ) );
|
||||
out->tcp_backlog = 10; /* does this need to be settable? */
|
||||
|
||||
FATAL_IF_NULL(s_ip_address, "No IP address supplied");
|
||||
@@ -77,12 +66,11 @@ struct server * server_create (
|
||||
parse_port( s_port, &out->bind_to.v4 );
|
||||
|
||||
out->filename = s_file;
|
||||
out->filename_incomplete = xmalloc(strlen(s_file)+11+1);
|
||||
strcpy(out->filename_incomplete, s_file);
|
||||
strcpy(out->filename_incomplete + strlen(s_file), ".INCOMPLETE");
|
||||
|
||||
out->l_io = flexthread_mutex_create();
|
||||
out->l_acl = flexthread_mutex_create();
|
||||
out->l_start_mirror = flexthread_mutex_create();
|
||||
|
||||
out->mirror_can_start = 1;
|
||||
|
||||
out->close_signal = self_pipe_create();
|
||||
out->acl_updated_signal = self_pipe_create();
|
||||
@@ -100,28 +88,29 @@ void server_destroy( struct server * serve )
|
||||
self_pipe_destroy( serve->close_signal );
|
||||
serve->close_signal = NULL;
|
||||
|
||||
flexthread_mutex_destroy( serve->l_start_mirror );
|
||||
flexthread_mutex_destroy( serve->l_acl );
|
||||
flexthread_mutex_destroy( serve->l_io );
|
||||
|
||||
if ( serve->acl ) {
|
||||
acl_destroy( serve->acl );
|
||||
serve->acl = NULL;
|
||||
}
|
||||
|
||||
free( serve->filename_incomplete );
|
||||
|
||||
free( serve->nbd_client );
|
||||
free( serve );
|
||||
}
|
||||
|
||||
|
||||
void server_dirty(struct server *serve, off64_t from, int len)
|
||||
void server_unlink( struct server * serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
NULLCHECK( serve->filename );
|
||||
|
||||
FATAL_IF_NEGATIVE( unlink( serve->filename ),
|
||||
"Failed to unlink %s: %s",
|
||||
serve->filename,
|
||||
strerror( errno ) );
|
||||
|
||||
if (serve->mirror) {
|
||||
bitset_set_range(serve->mirror->dirty_map, from, len);
|
||||
}
|
||||
}
|
||||
|
||||
#define SERVER_LOCK( s, f, msg ) \
|
||||
@@ -131,30 +120,6 @@ void server_dirty(struct server *serve, off64_t from, int len)
|
||||
do { NULLCHECK( s ); \
|
||||
FATAL_IF( 0 != flexthread_mutex_unlock( s->f ), msg ); } while (0)
|
||||
|
||||
void server_lock_io( struct server * serve)
|
||||
{
|
||||
debug("IO locking");
|
||||
|
||||
SERVER_LOCK( serve, l_io, "Problem with I/O lock" );
|
||||
}
|
||||
|
||||
void server_unlock_io( struct server* serve )
|
||||
{
|
||||
debug("IO unlocking");
|
||||
|
||||
SERVER_UNLOCK( serve, l_io, "Problem with I/O unlock" );
|
||||
}
|
||||
|
||||
|
||||
/* This is only to be called from error handlers. */
|
||||
int server_io_locked( struct server * serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
return flexthread_mutex_held( serve->l_io );
|
||||
}
|
||||
|
||||
|
||||
|
||||
void server_lock_acl( struct server *serve )
|
||||
{
|
||||
debug("ACL locking");
|
||||
@@ -164,6 +129,8 @@ void server_lock_acl( struct server *serve )
|
||||
|
||||
void server_unlock_acl( struct server *serve )
|
||||
{
|
||||
debug( "ACL unlocking" );
|
||||
|
||||
SERVER_UNLOCK( serve, l_acl, "Problem with ACL unlock" );
|
||||
}
|
||||
|
||||
@@ -175,6 +142,26 @@ int server_acl_locked( struct server * serve )
|
||||
}
|
||||
|
||||
|
||||
void server_lock_start_mirror( struct server *serve )
|
||||
{
|
||||
debug("Mirror start locking");
|
||||
|
||||
SERVER_LOCK( serve, l_start_mirror, "Problem with start mirror lock" );
|
||||
}
|
||||
|
||||
void server_unlock_start_mirror( struct server *serve )
|
||||
{
|
||||
debug("Mirror start unlocking");
|
||||
|
||||
SERVER_UNLOCK( serve, l_start_mirror, "Problem with start mirror unlock" );
|
||||
}
|
||||
|
||||
int server_start_mirror_locked( struct server * serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
return flexthread_mutex_held( serve->l_start_mirror );
|
||||
}
|
||||
|
||||
/** Return the actual port the server bound to. This is used because we
|
||||
* are allowed to pass "0" on the command-line.
|
||||
*/
|
||||
@@ -192,74 +179,15 @@ int server_port( struct server * server )
|
||||
}
|
||||
|
||||
|
||||
/* Try to bind to our serving socket, retrying until it works or gives a
|
||||
* fatal error. */
|
||||
void serve_bind( struct server * serve )
|
||||
{
|
||||
int bind_result;
|
||||
|
||||
char s_address[64];
|
||||
memset( s_address, 0, 64 );
|
||||
strcpy( s_address, "???" );
|
||||
inet_ntop( serve->bind_to.generic.sa_family,
|
||||
sockaddr_address_data( &serve->bind_to.generic),
|
||||
s_address, 64 );
|
||||
|
||||
do {
|
||||
bind_result = bind(
|
||||
serve->server_fd,
|
||||
&serve->bind_to.generic,
|
||||
sizeof(serve->bind_to));
|
||||
|
||||
if ( 0 == bind_result ) {
|
||||
info( "Bound to %s port %d",
|
||||
s_address,
|
||||
ntohs(serve->bind_to.v4.sin_port));
|
||||
break;
|
||||
}
|
||||
else {
|
||||
|
||||
warn( "Couldn't bind to %s port %d: %s",
|
||||
s_address,
|
||||
ntohs(serve->bind_to.v4.sin_port),
|
||||
strerror( errno ) );
|
||||
|
||||
switch (errno){
|
||||
/* bind() can give us EACCES,
|
||||
* EADDRINUSE, EADDRNOTAVAIL, EBADF,
|
||||
* EINVAL or ENOTSOCK.
|
||||
*
|
||||
* Any of these other than EACCES,
|
||||
* EADDRINUSE or EADDRNOTAVAIL signify
|
||||
* that there's a logic error somewhere.
|
||||
*/
|
||||
case EACCES:
|
||||
case EADDRINUSE:
|
||||
case EADDRNOTAVAIL:
|
||||
debug("retrying");
|
||||
sleep(1);
|
||||
continue;
|
||||
default:
|
||||
fatal( "Giving up" );
|
||||
}
|
||||
}
|
||||
} while ( 1 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Prepares a listening socket for the NBD server, binding etc. */
|
||||
void serve_open_server_socket(struct server* params)
|
||||
{
|
||||
NULLCHECK( params );
|
||||
|
||||
int optval=1;
|
||||
|
||||
params->server_fd = socket(params->bind_to.generic.sa_family == AF_INET ?
|
||||
PF_INET : PF_INET6, SOCK_STREAM, 0);
|
||||
|
||||
FATAL_IF_NEGATIVE(params->server_fd,
|
||||
"Couldn't create server socket");
|
||||
FATAL_IF_NEGATIVE( params->server_fd, "Couldn't create server socket" );
|
||||
|
||||
/* We need SO_REUSEADDR so that when we switch from listening to
|
||||
* serving we don't have to change address if we don't want to.
|
||||
@@ -270,8 +198,7 @@ void serve_open_server_socket(struct server* params)
|
||||
* we barf.
|
||||
*/
|
||||
FATAL_IF_NEGATIVE(
|
||||
setsockopt(params->server_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)),
|
||||
"Couldn't set SO_REUSEADDR"
|
||||
sock_set_reuseaddr( params->server_fd, 1 ), "Couldn't set SO_REUSEADDR"
|
||||
);
|
||||
|
||||
/* TCP_NODELAY makes everything not be slow. If we can't set
|
||||
@@ -279,14 +206,16 @@ void serve_open_server_socket(struct server* params)
|
||||
* understand.
|
||||
*/
|
||||
FATAL_IF_NEGATIVE(
|
||||
setsockopt(params->server_fd, IPPROTO_TCP, TCP_NODELAY, &optval, sizeof(optval)),
|
||||
"Couldn't set TCP_NODELAY"
|
||||
sock_set_tcp_nodelay( params->server_fd, 1 ), "Couldn't set TCP_NODELAY"
|
||||
);
|
||||
|
||||
/* If we can't bind, presumably that's because someone else is
|
||||
* squatting on our ip/port combo, or the ip isn't yet
|
||||
* configured. Ideally we want to retry this. */
|
||||
serve_bind(params);
|
||||
FATAL_UNLESS_ZERO(
|
||||
sock_try_bind( params->server_fd, ¶ms->bind_to.generic ),
|
||||
SHOW_ERRNO( "Failed to bind() socket" )
|
||||
);
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
listen(params->server_fd, params->tcp_backlog),
|
||||
@@ -304,33 +233,29 @@ int tryjoin_client_thread( struct client_tbl_entry *entry, int (*joinfunc)(pthre
|
||||
|
||||
int was_closed = 0;
|
||||
void * status=NULL;
|
||||
int join_errno;
|
||||
|
||||
if (entry->thread != 0) {
|
||||
char s_client_address[64];
|
||||
char s_client_address[128];
|
||||
|
||||
memset(s_client_address, 0, 64);
|
||||
strcpy(s_client_address, "???");
|
||||
inet_ntop( entry->address.generic.sa_family,
|
||||
sockaddr_address_data(&entry->address.generic),
|
||||
s_client_address,
|
||||
64 );
|
||||
sockaddr_address_string( &entry->address.generic, &s_client_address[0], 128 );
|
||||
|
||||
debug( "%s(%p,...)", joinfunc == pthread_join ? "joining" : "tryjoining", entry->thread );
|
||||
join_errno = joinfunc(entry->thread, &status);
|
||||
int join_errno = joinfunc(entry->thread, &status);
|
||||
|
||||
/* join_errno can legitimately be ESRCH if the thread is
|
||||
* already dead, but the client still needs tidying up. */
|
||||
if (join_errno != 0 && !entry->client->stopped ) {
|
||||
debug( "join_errno was %s, stopped was %d", strerror( join_errno ), entry->client->stopped );
|
||||
FATAL_UNLESS( join_errno == EBUSY,
|
||||
"Problem with joining thread %p: %s",
|
||||
entry->thread,
|
||||
strerror(join_errno) );
|
||||
}
|
||||
else {
|
||||
else if ( join_errno == 0 ) {
|
||||
debug("nbd thread %016x exited (%s) with status %ld",
|
||||
entry->thread,
|
||||
s_client_address,
|
||||
(uint64_t)status);
|
||||
(uintptr_t)status);
|
||||
client_destroy( entry->client );
|
||||
entry->client = NULL;
|
||||
entry->thread = 0;
|
||||
@@ -401,6 +326,20 @@ int cleanup_and_find_client_slot(struct server* params)
|
||||
return slot;
|
||||
}
|
||||
|
||||
int server_count_clients( struct server *params )
|
||||
{
|
||||
NULLCHECK( params );
|
||||
int i, count = 0;
|
||||
|
||||
for ( i = 0 ; i < params->max_nbd_clients ; i++ ) {
|
||||
if ( params->nbd_client[i].thread != 0 ) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/** Check whether the address client_address is allowed or not according
|
||||
* to the current acl. If params->acl is NULL, the result will be 1,
|
||||
@@ -435,9 +374,11 @@ int server_should_accept_client(
|
||||
NULLCHECK( client_address );
|
||||
NULLCHECK( s_client_address );
|
||||
|
||||
if (inet_ntop(client_address->generic.sa_family,
|
||||
sockaddr_address_data(&client_address->generic),
|
||||
s_client_address, s_client_address_len ) == NULL) {
|
||||
const char* result = sockaddr_address_string(
|
||||
&client_address->generic, s_client_address, s_client_address_len
|
||||
);
|
||||
|
||||
if ( NULL == result ) {
|
||||
warn( "Rejecting client %s: Bad client_address", s_client_address );
|
||||
return 0;
|
||||
}
|
||||
@@ -483,18 +424,22 @@ void accept_nbd_client(
|
||||
|
||||
|
||||
if ( !server_should_accept_client( params, client_address, s_client_address, 64 ) ) {
|
||||
close( client_fd );
|
||||
FATAL_IF_NEGATIVE( close( client_fd ),
|
||||
"Error closing client socket fd %d", client_fd );
|
||||
debug("Closed client socket fd %d", client_fd);
|
||||
return;
|
||||
}
|
||||
|
||||
slot = cleanup_and_find_client_slot(params);
|
||||
if (slot < 0) {
|
||||
warn("too many clients to accept connection");
|
||||
close(client_fd);
|
||||
FATAL_IF_NEGATIVE( close( client_fd ),
|
||||
"Error closing client socket fd %d", client_fd );
|
||||
debug("Closed client socket fd %d", client_fd);
|
||||
return;
|
||||
}
|
||||
|
||||
debug( "Client %s accepted.", s_client_address );
|
||||
info( "Client %s accepted on fd %d.", s_client_address, client_fd );
|
||||
client_params = client_create( params, client_fd );
|
||||
|
||||
params->nbd_client[slot].client = client_params;
|
||||
@@ -506,7 +451,9 @@ void accept_nbd_client(
|
||||
if ( 0 != spawn_client_thread( client_params, thread ) ) {
|
||||
debug( "Thread creation problem." );
|
||||
client_destroy( client_params );
|
||||
close(client_fd);
|
||||
FATAL_IF_NEGATIVE( close(client_fd),
|
||||
"Error closing client socket fd %d", client_fd );
|
||||
debug("Closed client socket fd %d", client_fd);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -526,7 +473,7 @@ void server_audit_clients( struct server * serve)
|
||||
* won't have been audited against the later acl. This isn't a
|
||||
* problem though, because in order to update the acl
|
||||
* server_replace_acl must have been called, so the
|
||||
* server_accept ioop will see a second acl_updated signal as
|
||||
* server_accept loop will see a second acl_updated signal as
|
||||
* soon as it hits select, and a second audit will be run.
|
||||
*/
|
||||
for( i = 0; i < serve->max_nbd_clients; i++ ) {
|
||||
@@ -551,7 +498,7 @@ void server_close_clients( struct server *params )
|
||||
|
||||
info("closing all clients");
|
||||
|
||||
int i, j;
|
||||
int i; /* , j; */
|
||||
struct client_tbl_entry *entry;
|
||||
|
||||
for( i = 0; i < params->max_nbd_clients; i++ ) {
|
||||
@@ -562,9 +509,17 @@ void server_close_clients( struct server *params )
|
||||
client_signal_stop( entry->client );
|
||||
}
|
||||
}
|
||||
for( j = 0; j < params->max_nbd_clients; j++ ) {
|
||||
join_client_thread( ¶ms->nbd_client[j] );
|
||||
}
|
||||
/* We don't join the clients here. When we enter the final
|
||||
* mirror pass, we get the IO lock, then wait for the server_fd
|
||||
* to close before sending the data, to be sure that no new
|
||||
* clients can be accepted which might think they've written
|
||||
* to the disc. However, an existing client thread can be
|
||||
* waiting for the IO lock already, so if we try to join it
|
||||
* here, we deadlock.
|
||||
*
|
||||
* The client threads will be joined in serve_cleanup.
|
||||
*
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
@@ -592,19 +547,63 @@ void server_replace_acl( struct server *serve, struct acl * new_acl )
|
||||
}
|
||||
|
||||
|
||||
void server_prevent_mirror_start( struct server *serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
|
||||
serve->mirror_can_start = 0;
|
||||
}
|
||||
|
||||
void server_allow_mirror_start( struct server *serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
|
||||
serve->mirror_can_start = 1;
|
||||
}
|
||||
|
||||
|
||||
/* Only call this with the mirror start lock held */
|
||||
int server_mirror_can_start( struct server *serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
|
||||
return serve->mirror_can_start;
|
||||
}
|
||||
|
||||
|
||||
/* Queries to see if we are currently mirroring. If we are, we need
|
||||
* to communicate that via the process exit status. because otherwise
|
||||
* the supervisor will assume the migration completed.
|
||||
*/
|
||||
int serve_shutdown_is_graceful( struct server *params )
|
||||
{
|
||||
int is_mirroring = 0;
|
||||
server_lock_start_mirror( params );
|
||||
{
|
||||
if ( server_is_mirroring( params ) ) {
|
||||
is_mirroring = 1;
|
||||
warn( "Stop signal received while mirroring." );
|
||||
server_prevent_mirror_start( params );
|
||||
}
|
||||
}
|
||||
server_unlock_start_mirror( params );
|
||||
|
||||
return !is_mirroring;
|
||||
}
|
||||
|
||||
|
||||
/** Accept either an NBD or control socket connection, dispatch appropriately */
|
||||
int server_accept( struct server * params )
|
||||
{
|
||||
NULLCHECK( params );
|
||||
debug("accept loop starting");
|
||||
int client_fd;
|
||||
union mysockaddr client_address;
|
||||
fd_set fds;
|
||||
socklen_t socklen=sizeof(client_address);
|
||||
/* We select on this fd to receive OS signals (only a few of
|
||||
* which we're interested in, see flexnbd.c */
|
||||
int signal_fd = flexnbd_signal_fd( params->flexnbd );
|
||||
int should_continue = 1;
|
||||
|
||||
FD_ZERO(&fds);
|
||||
FD_SET(params->server_fd, &fds);
|
||||
@@ -612,18 +611,22 @@ int server_accept( struct server * params )
|
||||
self_pipe_fd_set( params->close_signal, &fds );
|
||||
self_pipe_fd_set( params->acl_updated_signal, &fds );
|
||||
|
||||
FATAL_IF_NEGATIVE(select(FD_SETSIZE, &fds,
|
||||
NULL, NULL, NULL), "select() failed");
|
||||
FATAL_IF_NEGATIVE(
|
||||
sock_try_select(FD_SETSIZE, &fds, NULL, NULL, NULL),
|
||||
SHOW_ERRNO( "select() failed" )
|
||||
);
|
||||
|
||||
if ( self_pipe_fd_isset( params->close_signal, &fds ) ){
|
||||
server_close_clients( params );
|
||||
return 0;
|
||||
should_continue = 0;
|
||||
}
|
||||
|
||||
|
||||
if ( 0 < signal_fd && FD_ISSET( signal_fd, &fds ) ){
|
||||
debug( "Stop signal received." );
|
||||
server_close_clients( params );
|
||||
return 0;
|
||||
params->success = params->success && serve_shutdown_is_graceful( params );
|
||||
should_continue = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -633,12 +636,18 @@ int server_accept( struct server * params )
|
||||
}
|
||||
|
||||
if ( FD_ISSET( params->server_fd, &fds ) ){
|
||||
client_fd = accept( params->server_fd, &client_address.generic, &socklen );
|
||||
debug("Accepted nbd client socket");
|
||||
int client_fd = accept( params->server_fd, &client_address.generic, &socklen );
|
||||
|
||||
if ( params->allow_new_clients ) {
|
||||
debug("Accepted nbd client socket fd %d", client_fd);
|
||||
accept_nbd_client(params, client_fd, &client_address);
|
||||
} else {
|
||||
debug( "New NBD client socket %d not allowed", client_fd );
|
||||
sock_try_close( client_fd );
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
return should_continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -648,12 +657,48 @@ void serve_accept_loop(struct server* params)
|
||||
while( server_accept( params ) );
|
||||
}
|
||||
|
||||
void* build_allocation_map_thread(void* serve_uncast)
|
||||
{
|
||||
NULLCHECK( serve_uncast );
|
||||
|
||||
struct server* serve = (struct server*) serve_uncast;
|
||||
|
||||
NULLCHECK( serve->filename );
|
||||
NULLCHECK( serve->allocation_map );
|
||||
|
||||
int fd = open( serve->filename, O_RDONLY );
|
||||
FATAL_IF_NEGATIVE( fd, "Couldn't open %s", serve->filename );
|
||||
|
||||
if ( build_allocation_map( serve->allocation_map, fd ) ) {
|
||||
serve->allocation_map_built = 1;
|
||||
}
|
||||
else {
|
||||
/* We can operate without it, but we can't free it without a race.
|
||||
* All that happens if we leave it is that it gradually builds up an
|
||||
* *incomplete* record of writes. Nobody will use it, as
|
||||
* allocation_map_built == 0 for the lifetime of the process.
|
||||
*
|
||||
* The stream functionality can still be relied on. We don't need to
|
||||
* worry about mirroring waiting for the allocation map to finish,
|
||||
* because we already copy every byte at least once. If that changes in
|
||||
* the future, we'll need to wait for the allocation map to finish or
|
||||
* fail before we can complete the migration.
|
||||
*/
|
||||
serve->allocation_map_not_built = 1;
|
||||
warn( "Didn't build allocation map for %s", serve->filename );
|
||||
}
|
||||
|
||||
close( fd );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/** Initialisation function that sets up the initial allocation map, i.e. so
|
||||
* we know which blocks of the file are allocated.
|
||||
*/
|
||||
void serve_init_allocation_map(struct server* params)
|
||||
{
|
||||
NULLCHECK( params );
|
||||
NULLCHECK( params->filename );
|
||||
|
||||
int fd = open( params->filename, O_RDONLY );
|
||||
off64_t size;
|
||||
@@ -663,12 +708,52 @@ void serve_init_allocation_map(struct server* params)
|
||||
params->size = size;
|
||||
FATAL_IF_NEGATIVE( size, "Couldn't find size of %s",
|
||||
params->filename );
|
||||
|
||||
params->allocation_map =
|
||||
build_allocation_map(fd, size, block_allocation_resolution);
|
||||
close(fd);
|
||||
bitset_alloc( params->size, block_allocation_resolution );
|
||||
|
||||
int ok = pthread_create( ¶ms->allocation_map_builder_thread,
|
||||
NULL,
|
||||
build_allocation_map_thread,
|
||||
params );
|
||||
|
||||
FATAL_IF_NEGATIVE( ok, "Couldn't create thread" );
|
||||
}
|
||||
|
||||
|
||||
void server_forbid_new_clients( struct server * serve )
|
||||
{
|
||||
serve->allow_new_clients = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
void server_allow_new_clients( struct server * serve )
|
||||
{
|
||||
serve->allow_new_clients = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
void server_join_clients( struct server * serve ) {
|
||||
int i;
|
||||
void* status;
|
||||
|
||||
for (i=0; i < serve->max_nbd_clients; i++) {
|
||||
pthread_t thread_id = serve->nbd_client[i].thread;
|
||||
|
||||
if (thread_id != 0) {
|
||||
debug( "joining thread %p", thread_id );
|
||||
int err = pthread_join( thread_id, &status );
|
||||
if ( 0 == err ) {
|
||||
serve->nbd_client[i].thread = 0;
|
||||
} else {
|
||||
warn( "Error %s (%i) joining thread %p", strerror( err ), err, thread_id );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Tell the server to close all the things. */
|
||||
void serve_signal_close( struct server * serve )
|
||||
{
|
||||
@@ -677,6 +762,7 @@ void serve_signal_close( struct server * serve )
|
||||
self_pipe_signal( serve->close_signal );
|
||||
}
|
||||
|
||||
|
||||
/* Block until the server closes the server_fd.
|
||||
*/
|
||||
void serve_wait_for_close( struct server * serve )
|
||||
@@ -686,55 +772,73 @@ void serve_wait_for_close( struct server * serve )
|
||||
}
|
||||
}
|
||||
|
||||
/* We've just had an ENTRUST/DISCONNECT pair, so we need to shut down
|
||||
/* We've just had an DISCONNECT pair, so we need to shut down
|
||||
* and signal our listener that we can safely take over.
|
||||
*/
|
||||
void server_control_arrived( struct server *serve )
|
||||
{
|
||||
debug( "server_control_arrived" );
|
||||
NULLCHECK( serve );
|
||||
|
||||
serve->has_control = 1;
|
||||
if ( !serve->success ) {
|
||||
serve->success = 1;
|
||||
serve_signal_close( serve );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void flexnbd_stop_control( struct flexnbd * flexnbd );
|
||||
|
||||
/** Closes sockets, frees memory and waits for all client threads to finish */
|
||||
void serve_cleanup(struct server* params,
|
||||
int fatal __attribute__ ((unused)) )
|
||||
{
|
||||
NULLCHECK( params );
|
||||
void* status;
|
||||
|
||||
info("cleaning up");
|
||||
|
||||
int i;
|
||||
|
||||
if (params->server_fd){ close(params->server_fd); }
|
||||
|
||||
/* need to stop background build if we're killed very early on */
|
||||
pthread_cancel(params->allocation_map_builder_thread);
|
||||
pthread_join(params->allocation_map_builder_thread, &status);
|
||||
|
||||
int need_mirror_lock;
|
||||
need_mirror_lock = !server_start_mirror_locked( params );
|
||||
|
||||
if ( need_mirror_lock ) { server_lock_start_mirror( params ); }
|
||||
{
|
||||
if ( server_is_mirroring( params ) ) {
|
||||
server_abandon_mirror( params );
|
||||
}
|
||||
server_prevent_mirror_start( params );
|
||||
}
|
||||
if ( need_mirror_lock ) { server_unlock_start_mirror( params ); }
|
||||
|
||||
server_join_clients( params );
|
||||
|
||||
if (params->allocation_map) {
|
||||
free(params->allocation_map);
|
||||
bitset_free( params->allocation_map );
|
||||
}
|
||||
|
||||
if (params->mirror_super) {
|
||||
/* AWOOGA! RACE! */
|
||||
pthread_t mirror_t = params->mirror_super->thread;
|
||||
params->mirror->signal_abandon = 1;
|
||||
pthread_join( mirror_t, NULL );
|
||||
}
|
||||
|
||||
for (i=0; i < params->max_nbd_clients; i++) {
|
||||
void* status;
|
||||
pthread_t thread_id = params->nbd_client[i].thread;
|
||||
|
||||
if (thread_id != 0) {
|
||||
debug("joining thread %p", thread_id);
|
||||
pthread_join(thread_id, &status);
|
||||
}
|
||||
if ( server_start_mirror_locked( params ) ) {
|
||||
server_unlock_start_mirror( params );
|
||||
}
|
||||
|
||||
if ( server_acl_locked( params ) ) {
|
||||
server_unlock_acl( params );
|
||||
}
|
||||
|
||||
/* if( params->flexnbd ) { */
|
||||
/* if ( params->flexnbd->control ) { */
|
||||
/* flexnbd_stop_control( params->flexnbd ); */
|
||||
/* } */
|
||||
/* flexnbd_destroy( params->flexnbd ); */
|
||||
/* } */
|
||||
|
||||
/* server_destroy( params ); */
|
||||
|
||||
debug( "Cleanup done");
|
||||
}
|
||||
|
||||
@@ -742,7 +846,83 @@ void serve_cleanup(struct server* params,
|
||||
int server_is_in_control( struct server *serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
return serve->has_control;
|
||||
return serve->success;
|
||||
}
|
||||
|
||||
int server_is_mirroring( struct server * serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
return !!serve->mirror_super;
|
||||
}
|
||||
|
||||
uint64_t server_mirror_bytes_remaining( struct server * serve )
|
||||
{
|
||||
if ( server_is_mirroring( serve ) ) {
|
||||
uint64_t bytes_to_xfer =
|
||||
bitset_stream_queued_bytes( serve->allocation_map, BITSET_STREAM_SET ) +
|
||||
( serve->size - serve->mirror->offset );
|
||||
|
||||
return bytes_to_xfer;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Given historic bps measurements and number of bytes left to transfer, give
|
||||
* an estimate of how many seconds are remaining before the migration is
|
||||
* complete, assuming no new bytes are written.
|
||||
*/
|
||||
|
||||
uint64_t server_mirror_eta( struct server * serve )
|
||||
{
|
||||
if ( server_is_mirroring( serve ) ) {
|
||||
uint64_t bytes_to_xfer = server_mirror_bytes_remaining( serve );
|
||||
return bytes_to_xfer / ( server_mirror_bps( serve ) + 1 );
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t server_mirror_bps( struct server * serve )
|
||||
{
|
||||
if ( server_is_mirroring( serve ) ) {
|
||||
uint64_t duration_ms =
|
||||
monotonic_time_ms() - serve->mirror->migration_started;
|
||||
|
||||
return serve->mirror->all_dirty / ( ( duration_ms / 1000 ) + 1 );
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mirror_super_destroy( struct mirror_super * super );
|
||||
|
||||
/* This must only be called with the start_mirror lock held */
|
||||
void server_abandon_mirror( struct server * serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
if ( serve->mirror_super ) {
|
||||
/* FIXME: AWOOGA! RACE!
|
||||
* We can set abandon_signal after mirror_super has checked it, but
|
||||
* before the reset. However, mirror_reset doesn't clear abandon_signal
|
||||
* so it'll just terminate early on the next pass. */
|
||||
ERROR_UNLESS(
|
||||
self_pipe_signal( serve->mirror->abandon_signal ),
|
||||
"Failed to signal abandon to mirror"
|
||||
);
|
||||
|
||||
pthread_t tid = serve->mirror_super->thread;
|
||||
pthread_join( tid, NULL );
|
||||
debug( "Mirror thread %p pthread_join returned", tid );
|
||||
|
||||
server_allow_mirror_start( serve );
|
||||
mirror_super_destroy( serve->mirror_super );
|
||||
|
||||
serve->mirror = NULL;
|
||||
serve->mirror_super = NULL;
|
||||
|
||||
debug( "Mirror supervisor done." );
|
||||
}
|
||||
}
|
||||
|
||||
int server_default_deny( struct server * serve )
|
||||
@@ -752,19 +932,23 @@ int server_default_deny( struct server * serve )
|
||||
}
|
||||
|
||||
/** Full lifecycle of the server */
|
||||
int do_serve(struct server* params)
|
||||
int do_serve( struct server* params, struct self_pipe * open_signal )
|
||||
{
|
||||
NULLCHECK( params );
|
||||
|
||||
int has_control;
|
||||
int success;
|
||||
|
||||
error_set_handler((cleanup_handler*) serve_cleanup, params);
|
||||
serve_open_server_socket(params);
|
||||
|
||||
/* Only signal that we are open for business once the server
|
||||
socket is open */
|
||||
if ( NULL != open_signal ) { self_pipe_signal( open_signal ); }
|
||||
|
||||
serve_init_allocation_map(params);
|
||||
serve_accept_loop(params);
|
||||
has_control = params->has_control;
|
||||
success = params->success;
|
||||
serve_cleanup(params, 0);
|
||||
|
||||
return has_control;
|
||||
return success;
|
||||
}
|
||||
|
167
src/server/serve.h
Normal file
167
src/server/serve.h
Normal file
@@ -0,0 +1,167 @@
|
||||
#ifndef SERVE_H
|
||||
#define SERVE_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h> /* for sig_atomic_t */
|
||||
|
||||
#include "flexnbd.h"
|
||||
#include "parse.h"
|
||||
#include "acl.h"
|
||||
|
||||
|
||||
static const int block_allocation_resolution = 4096;//128<<10;
|
||||
|
||||
|
||||
struct client_tbl_entry {
|
||||
pthread_t thread;
|
||||
union mysockaddr address;
|
||||
struct client * client;
|
||||
};
|
||||
|
||||
|
||||
#define MAX_NBD_CLIENTS 16
|
||||
struct server {
|
||||
/* The flexnbd wrapper this server is attached to */
|
||||
struct flexnbd * flexnbd;
|
||||
|
||||
/** address/port to bind to */
|
||||
union mysockaddr bind_to;
|
||||
/** (static) file name to serve */
|
||||
char* filename;
|
||||
/** TCP backlog for listen() */
|
||||
int tcp_backlog;
|
||||
/** (static) file name of UNIX control socket (or NULL if none) */
|
||||
char* control_socket_name;
|
||||
/** size of file */
|
||||
uint64_t size;
|
||||
|
||||
/** to interrupt accept loop and clients, write() to close_signal[1] */
|
||||
struct self_pipe * close_signal;
|
||||
|
||||
/** access control list */
|
||||
struct acl * acl;
|
||||
/** acl_updated_signal will be signalled after the acl struct
|
||||
* has been replaced
|
||||
*/
|
||||
struct self_pipe * acl_updated_signal;
|
||||
|
||||
/* Claimed around any updates to the ACL. */
|
||||
struct flexthread_mutex * l_acl;
|
||||
|
||||
/* Claimed around starting a mirror so that it doesn't race with
|
||||
* shutting down on a SIGTERM. */
|
||||
struct flexthread_mutex * l_start_mirror;
|
||||
|
||||
struct mirror* mirror;
|
||||
struct mirror_super * mirror_super;
|
||||
/* This is used to stop the mirror from starting after we
|
||||
* receive a SIGTERM */
|
||||
int mirror_can_start;
|
||||
|
||||
int server_fd;
|
||||
int control_fd;
|
||||
|
||||
/* the allocation_map keeps track of which blocks in the backing file
|
||||
* have been allocated, or part-allocated on disc, with unallocated
|
||||
* blocks presumed to contain zeroes (i.e. represented as sparse files
|
||||
* by the filesystem). We can use this information when receiving
|
||||
* incoming writes, and avoid writing zeroes to unallocated sections
|
||||
* of the file which would needlessly increase disc usage. This
|
||||
* bitmap will start at all-zeroes for an empty file, and tend towards
|
||||
* all-ones as the file is written to (i.e. we assume that allocated
|
||||
* blocks can never become unallocated again, as is the case with ext3
|
||||
* at least).
|
||||
*/
|
||||
struct bitset * allocation_map;
|
||||
/* when starting up, this thread builds the allocation_map */
|
||||
pthread_t allocation_map_builder_thread;
|
||||
|
||||
/* when the thread has finished, it sets this to 1 */
|
||||
volatile sig_atomic_t allocation_map_built;
|
||||
volatile sig_atomic_t allocation_map_not_built;
|
||||
|
||||
int max_nbd_clients;
|
||||
struct client_tbl_entry *nbd_client;
|
||||
|
||||
/** Should clients use the killswitch? */
|
||||
int use_killswitch;
|
||||
|
||||
/** If this isn't set, newly accepted clients will be closed immediately */
|
||||
int allow_new_clients;
|
||||
|
||||
/* Marker for whether this server has control over the data in
|
||||
* the file, or if we're waiting to receive it from an inbound
|
||||
* migration which hasn't yet finished.
|
||||
*
|
||||
* It's the value which controls the exit status of a serve or
|
||||
* listen process.
|
||||
*/
|
||||
int success;
|
||||
};
|
||||
|
||||
struct server * server_create(
|
||||
struct flexnbd * flexnbd,
|
||||
char* s_ip_address,
|
||||
char* s_port,
|
||||
char* s_file,
|
||||
int default_deny,
|
||||
int acl_entries,
|
||||
char** s_acl_entries,
|
||||
int max_nbd_clients,
|
||||
int use_killswitch,
|
||||
int success );
|
||||
void server_destroy( struct server * );
|
||||
int server_is_closed(struct server* serve);
|
||||
void serve_signal_close( struct server *serve );
|
||||
void serve_wait_for_close( struct server * serve );
|
||||
void server_replace_acl( struct server *serve, struct acl * acl);
|
||||
void server_control_arrived( struct server *serve );
|
||||
int server_is_in_control( struct server *serve );
|
||||
int server_default_deny( struct server * serve );
|
||||
int server_acl_locked( struct server * serve );
|
||||
void server_lock_acl( struct server *serve );
|
||||
void server_unlock_acl( struct server *serve );
|
||||
void server_lock_start_mirror( struct server *serve );
|
||||
void server_unlock_start_mirror( struct server *serve );
|
||||
int server_is_mirroring( struct server * serve );
|
||||
|
||||
uint64_t server_mirror_bytes_remaining( struct server * serve );
|
||||
uint64_t server_mirror_eta( struct server * serve );
|
||||
uint64_t server_mirror_bps( struct server * serve );
|
||||
|
||||
void server_abandon_mirror( struct server * serve );
|
||||
void server_prevent_mirror_start( struct server *serve );
|
||||
void server_allow_mirror_start( struct server *serve );
|
||||
int server_mirror_can_start( struct server *serve );
|
||||
|
||||
/* These three functions are used by mirror around the final pass, to close
|
||||
* existing clients and prevent new ones from being around
|
||||
*/
|
||||
|
||||
void server_forbid_new_clients( struct server *serve );
|
||||
void server_close_clients( struct server *serve );
|
||||
void server_join_clients( struct server *serve );
|
||||
void server_allow_new_clients( struct server *serve );
|
||||
|
||||
/* Returns a count (ish) of the number of currently-running client threads */
|
||||
int server_count_clients( struct server *params );
|
||||
|
||||
void server_unlink( struct server * serve );
|
||||
|
||||
int do_serve( struct server *, struct self_pipe * );
|
||||
|
||||
struct mode_readwrite_params {
|
||||
union mysockaddr connect_to;
|
||||
union mysockaddr connect_from;
|
||||
|
||||
uint64_t from;
|
||||
uint32_t len;
|
||||
|
||||
int data_fd;
|
||||
int client;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
78
src/server/status.c
Normal file
78
src/server/status.c
Normal file
@@ -0,0 +1,78 @@
|
||||
#include "status.h"
|
||||
#include "serve.h"
|
||||
#include "util.h"
|
||||
|
||||
struct status * status_create( struct server * serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
struct status * status;
|
||||
|
||||
status = xmalloc( sizeof( struct status ) );
|
||||
status->pid = getpid();
|
||||
status->size = serve->size;
|
||||
status->has_control = serve->success;
|
||||
|
||||
status->clients_allowed = serve->allow_new_clients;
|
||||
status->num_clients = server_count_clients( serve );
|
||||
|
||||
server_lock_start_mirror( serve );
|
||||
|
||||
status->is_mirroring = NULL != serve->mirror;
|
||||
if ( status->is_mirroring ) {
|
||||
status->migration_duration = monotonic_time_ms();
|
||||
|
||||
if ( ( serve->mirror->migration_started ) < status->migration_duration ) {
|
||||
status->migration_duration -= serve->mirror->migration_started;
|
||||
} else {
|
||||
status->migration_duration = 0;
|
||||
}
|
||||
status->migration_duration /= 1000;
|
||||
status->migration_speed = server_mirror_bps( serve );
|
||||
status->migration_speed_limit = serve->mirror->max_bytes_per_second;
|
||||
|
||||
status->migration_seconds_left = server_mirror_eta( serve );
|
||||
}
|
||||
|
||||
server_unlock_start_mirror( serve );
|
||||
|
||||
return status;
|
||||
|
||||
}
|
||||
|
||||
#define BOOL_S(var) (var ? "true" : "false" )
|
||||
#define PRINT_BOOL( var ) \
|
||||
do{dprintf( fd, #var "=%s ", BOOL_S( status->var ) );}while(0)
|
||||
#define PRINT_INT( var ) \
|
||||
do{dprintf( fd, #var "=%d ", status->var );}while(0)
|
||||
#define PRINT_UINT64( var ) \
|
||||
do{dprintf( fd, #var "=%"PRIu64" ", status->var );}while(0)
|
||||
|
||||
int status_write( struct status * status, int fd )
|
||||
{
|
||||
PRINT_INT( pid );
|
||||
PRINT_UINT64( size );
|
||||
PRINT_BOOL( is_mirroring );
|
||||
PRINT_BOOL( clients_allowed );
|
||||
PRINT_INT( num_clients );
|
||||
PRINT_BOOL( has_control );
|
||||
|
||||
if ( status->is_mirroring ) {
|
||||
PRINT_UINT64( migration_speed );
|
||||
PRINT_UINT64( migration_duration );
|
||||
PRINT_UINT64( migration_seconds_left );
|
||||
if ( status->migration_speed_limit < UINT64_MAX ) {
|
||||
PRINT_UINT64( migration_speed_limit );
|
||||
};
|
||||
}
|
||||
|
||||
dprintf(fd, "\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
void status_destroy( struct status * status )
|
||||
{
|
||||
NULLCHECK( status );
|
||||
free( status );
|
||||
}
|
||||
|
@@ -17,6 +17,12 @@
|
||||
*
|
||||
* The following status fields are defined:
|
||||
*
|
||||
* pid:
|
||||
* The current process ID.
|
||||
*
|
||||
* size:
|
||||
* The size of the backing file being served, in bytes.
|
||||
*
|
||||
* has_control:
|
||||
* This will be false when the server is listening for an incoming
|
||||
* migration. It will switch to true when the end-of-migration
|
||||
@@ -24,21 +30,60 @@
|
||||
* If the server is started in "serve" mode, this will never be
|
||||
* false.
|
||||
*
|
||||
* clients_allowed:
|
||||
* This will be false if the server is not currently allowing new
|
||||
* connections, for instance, if we're in the migration endgame.
|
||||
*
|
||||
* num_clients:
|
||||
* This tells us how many clients are currently running. If we're in the
|
||||
* migration endgame, it should be 0
|
||||
*
|
||||
* is_migrating:
|
||||
* This will be false when the server is started in either "listen"
|
||||
* or "serve" mode. It will become true when a server in "serve"
|
||||
* mode starts a migration, and will become false again when the
|
||||
* migration terminates, successfully or not.
|
||||
* If the server is currently in "listen" mode, this will never b
|
||||
* If the server is currently in "listen" mode, this will never be
|
||||
* true.
|
||||
*
|
||||
*
|
||||
* If is_migrating is true, then a number of other attributes may appear,
|
||||
* relating to the progress of the migration.
|
||||
*
|
||||
* migration_duration:
|
||||
* How long the migration has been running for, in ms.
|
||||
*
|
||||
* migration_speed:
|
||||
* Network transfer speed, in bytes/second. This only takes dirty bytes
|
||||
* into account.
|
||||
*
|
||||
* migration_speed_limit:
|
||||
* If set, the speed we're going to try to limit the migration to.
|
||||
*
|
||||
* migration_seconds_left:
|
||||
* Our current best estimate of how many seconds are left before the migration
|
||||
* migration is finished.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "serve.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
struct status {
|
||||
pid_t pid;
|
||||
uint64_t size;
|
||||
int has_control;
|
||||
int clients_allowed;
|
||||
int num_clients;
|
||||
int is_mirroring;
|
||||
|
||||
uint64_t migration_duration;
|
||||
uint64_t migration_speed;
|
||||
uint64_t migration_speed_limit;
|
||||
uint64_t migration_seconds_left;
|
||||
};
|
||||
|
||||
/** Create a status object for the given server. */
|
||||
@@ -53,3 +98,4 @@ void status_destroy( struct status * );
|
||||
|
||||
|
||||
#endif
|
||||
|
34
src/status.c
34
src/status.c
@@ -1,34 +0,0 @@
|
||||
#include "status.h"
|
||||
#include "serve.h"
|
||||
#include "util.h"
|
||||
|
||||
struct status * status_create( struct server * serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
struct status * status;
|
||||
|
||||
status = xmalloc( sizeof( struct status ) );
|
||||
status->has_control = serve->has_control;
|
||||
status->is_mirroring = NULL != serve->mirror;
|
||||
return status;
|
||||
|
||||
}
|
||||
|
||||
#define BOOL_S(var) (var ? "true" : "false" )
|
||||
#define PRINT_FIELD( var ) \
|
||||
do{dprintf( fd, #var "=%s ", BOOL_S( status->var ) );}while(0)
|
||||
|
||||
int status_write( struct status * status, int fd )
|
||||
{
|
||||
PRINT_FIELD( is_mirroring );
|
||||
PRINT_FIELD( has_control );
|
||||
dprintf(fd, "\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
void status_destroy( struct status * status )
|
||||
{
|
||||
NULLCHECK( status );
|
||||
free( status );
|
||||
}
|
13
tests/acceptance/custom.supp
Normal file
13
tests/acceptance/custom.supp
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
avoid_glibc_bug_do_lookup
|
||||
Memcheck:Addr8
|
||||
fun:do_lookup_x
|
||||
obj:*
|
||||
fun:_dl_lookup_symbol_x
|
||||
}
|
||||
{
|
||||
avoid_glibc_bug_check_match
|
||||
Memcheck:Addr8
|
||||
fun:check_match.12149
|
||||
}
|
||||
|
@@ -5,7 +5,7 @@ require 'file_writer'
|
||||
|
||||
class Environment
|
||||
attr_reader( :blocksize, :filename1, :filename2, :ip,
|
||||
:port1, :port2, :nbd1, :nbd2, :file1, :file2, :rebind_port1 )
|
||||
:port1, :port2, :nbd1, :nbd2, :file1, :file2 )
|
||||
|
||||
def initialize
|
||||
@blocksize = 1024
|
||||
@@ -14,15 +14,25 @@ class Environment
|
||||
@ip = "127.0.0.1"
|
||||
@available_ports = [*40000..41000] - listening_ports
|
||||
@port1 = @available_ports.shift
|
||||
@rebind_port1 = @available_ports.shift
|
||||
@port2 = @available_ports.shift
|
||||
@rebind_port2 = @available_ports.shift
|
||||
@nbd1 = FlexNBD.new("../../build/flexnbd", @ip, @port1, @ip, @rebind_port1)
|
||||
@nbd2 = FlexNBD.new("../../build/flexnbd", @ip, @port2, @ip, @rebind_port2)
|
||||
@nbd1 = FlexNBD::FlexNBD.new("../../build/flexnbd", @ip, @port1)
|
||||
@nbd2 = FlexNBD::FlexNBD.new("../../build/flexnbd", @ip, @port2)
|
||||
|
||||
@fake_pid = nil
|
||||
end
|
||||
|
||||
def prefetch_proxy!
|
||||
@nbd1.prefetch_proxy = true
|
||||
@nbd2.prefetch_proxy = true
|
||||
end
|
||||
|
||||
def proxy1(port=@port2)
|
||||
@nbd1.proxy(@ip, port)
|
||||
end
|
||||
def proxy2(port=@port1)
|
||||
@nbd2.proxy(@ip, port)
|
||||
end
|
||||
|
||||
|
||||
def serve1(*acl)
|
||||
@nbd1.serve(@filename1, *acl)
|
||||
@@ -42,6 +52,10 @@ class Environment
|
||||
end
|
||||
|
||||
|
||||
def break1
|
||||
@nbd1.break
|
||||
end
|
||||
|
||||
def acl1( *acl )
|
||||
@nbd1.acl( *acl )
|
||||
end
|
||||
@@ -69,6 +83,14 @@ class Environment
|
||||
@nbd1.mirror_unchecked( @nbd2.ip, @nbd2.port, nil, nil, 10 )
|
||||
end
|
||||
|
||||
def mirror12_unlink
|
||||
@nbd1.mirror_unlink( @nbd2.ip, @nbd2.port, 2 )
|
||||
end
|
||||
|
||||
|
||||
def write1( data )
|
||||
@nbd1.write( 0, data )
|
||||
end
|
||||
|
||||
def writefile1(data)
|
||||
@file1 = FileWriter.new(@filename1, @blocksize).write(data)
|
||||
@@ -111,20 +133,19 @@ class Environment
|
||||
end
|
||||
|
||||
|
||||
def run_fake( name, addr, port, rebind_addr = addr, rebind_port = port )
|
||||
def run_fake( name, addr, port, sock=nil )
|
||||
fakedir = File.join( File.dirname( __FILE__ ), "fakes" )
|
||||
fake = Dir[File.join( fakedir, name ) + "*"].sort.find { |fn|
|
||||
fakeglob = File.join( fakedir, name ) + "*"
|
||||
fake = Dir[fakeglob].sort.find { |fn|
|
||||
File.executable?( fn )
|
||||
}
|
||||
|
||||
raise "no fake executable" unless fake
|
||||
raise "no fake executable at #{fakeglob}" unless fake
|
||||
raise "no addr" unless addr
|
||||
raise "no port" unless port
|
||||
raise "no rebind_addr" unless rebind_addr
|
||||
raise "no rebind_port" unless rebind_port
|
||||
|
||||
@fake_pid = fork do
|
||||
exec [fake, addr, port, @nbd1.pid, rebind_addr, rebind_port].map{|x| x.to_s}.join(" ")
|
||||
exec [fake, addr, port, @nbd1.pid, sock].map{|x| x.to_s}.join(" ")
|
||||
end
|
||||
sleep(0.5)
|
||||
end
|
||||
|
35
tests/acceptance/fakes/dest/break_after_hello.rb
Executable file
35
tests/acceptance/fakes/dest/break_after_hello.rb
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
# Open a server, accept a client, then cancel the migration by issuing
|
||||
# a break command.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
|
||||
addr, port, src_pid, sock = *ARGV
|
||||
server = FakeDest.new( addr, port )
|
||||
client = server.accept
|
||||
|
||||
ctrl = UNIXSocket.open( sock )
|
||||
|
||||
Process.kill("STOP", src_pid.to_i)
|
||||
ctrl.write( "break\n" )
|
||||
ctrl.close_write
|
||||
client.write_hello
|
||||
Process.kill("CONT", src_pid.to_i)
|
||||
|
||||
fail "Unexpected control response" unless
|
||||
ctrl.read =~ /0: mirror stopped/
|
||||
|
||||
client2 = nil
|
||||
begin
|
||||
client2 = server.accept( "Expected timeout" )
|
||||
fail "Unexpected reconnection"
|
||||
rescue Timeout::Error
|
||||
# expected
|
||||
end
|
||||
client.close
|
||||
|
||||
exit(0)
|
||||
|
@@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
# Open a server, accept a client, then we expect a single write
|
||||
# followed by an entrust. Disconnect after the entrust. We expect a
|
||||
# reconnection followed by a full mirror.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
|
||||
addr, port, src_pid = *ARGV
|
||||
server = FakeDest.new( addr, port )
|
||||
client = server.accept
|
||||
|
||||
client.write_hello
|
||||
write_req = client.read_request
|
||||
data = client.read_data( write_req[:len] )
|
||||
client.write_reply( write_req[:handle], 0 )
|
||||
|
||||
entrust_req = client.read_request
|
||||
fail "Not an entrust" unless entrust_req[:type] == 65536
|
||||
client.close
|
||||
|
||||
client2 = server.accept
|
||||
client2.receive_mirror
|
||||
|
||||
|
||||
exit(0)
|
||||
|
@@ -3,7 +3,8 @@
|
||||
|
||||
# Open a server, accept a client, then we expect a single write
|
||||
# followed by an entrust. However, we disconnect after the write so
|
||||
# the entrust will fail. We expect a reconnection.
|
||||
# the entrust will fail. We don't expect a reconnection: the sender
|
||||
# can't reliably spot a failed send.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
@@ -21,7 +22,4 @@ client.write_reply( req[:handle], 0 )
|
||||
client.close
|
||||
Process.kill("CONT", src_pid.to_i)
|
||||
|
||||
client2 = server.accept
|
||||
client2.close
|
||||
|
||||
exit(0)
|
||||
|
@@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
# Receive a mirror, but respond to the entrust with an error. There's
|
||||
# currently no code path in flexnbd which can do this, but we could
|
||||
# add one.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
|
||||
addr, port = *ARGV
|
||||
server = FakeDest.new( addr, port )
|
||||
client = server.accept
|
||||
|
||||
client.write_hello
|
||||
loop do
|
||||
req = client.read_request
|
||||
if req[:type] == 1
|
||||
client.read_data( req[:len] )
|
||||
client.write_reply( req[:handle] )
|
||||
else
|
||||
client.write_reply( req[:handle], 1 )
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
client.close
|
||||
|
||||
client2 = server.accept( "Timed out waiting for a reconnection" )
|
||||
|
||||
client2.close
|
||||
server.close
|
||||
|
||||
exit(0)
|
@@ -20,7 +20,13 @@ t = Thread.start do
|
||||
client2.close
|
||||
end
|
||||
|
||||
sleep( FlexNBD::MS_REQUEST_LIMIT_SECS + 2 )
|
||||
sleep_time = if ENV.has_key?('FLEXNBD_MS_REQUEST_LIMIT_SECS')
|
||||
ENV['FLEXNBD_MS_REQUEST_LIMIT_SECS'].to_f
|
||||
else
|
||||
FlexNBD::MS_REQUEST_LIMIT_SECS
|
||||
end
|
||||
|
||||
sleep( sleep_time + 2.0 )
|
||||
client1.close
|
||||
|
||||
t.join
|
||||
|
19
tests/acceptance/fakes/dest/sigterm_after_hello.rb
Executable file
19
tests/acceptance/fakes/dest/sigterm_after_hello.rb
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
# Wait for a sender connection, send a correct hello, then sigterm the
|
||||
# sender. We expect the sender to exit with status of 6, which is
|
||||
# enforced in the test.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
|
||||
addr, port, pid = *ARGV
|
||||
server = FakeDest.new( addr, port )
|
||||
client = server.accept( "Timed out waiting for a connection" )
|
||||
client.write_hello
|
||||
|
||||
Process.kill(15, pid.to_i)
|
||||
|
||||
client.close
|
||||
server.close
|
||||
exit 0
|
@@ -3,8 +3,8 @@
|
||||
# Connect, send a migration, entrust then *immediately* disconnect.
|
||||
# This simulates a client which fails while the client is blocked.
|
||||
#
|
||||
# We attempt to reconnect immediately afterwards to prove that we can
|
||||
# retry the mirroring.
|
||||
# In this situation we expect the destination to quit with an error
|
||||
# status.
|
||||
|
||||
require 'flexnbd/fake_source'
|
||||
include FlexNBD
|
||||
@@ -28,7 +28,11 @@ system "kill -CONT #{srv_pid}"
|
||||
|
||||
sleep(0.25)
|
||||
|
||||
client2 = FakeSource.new( addr, port, "Timed out reconnecting" )
|
||||
client2.close
|
||||
begin
|
||||
client2 = FakeSource.new( addr, port, "Expected timeout" )
|
||||
fail "Unexpected reconnection"
|
||||
rescue Timeout::Error
|
||||
# expected
|
||||
end
|
||||
|
||||
exit(0)
|
||||
|
@@ -1,15 +1,14 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
# Connect, send a migration, entrust then *immediately* disconnect.
|
||||
# Connect, send a migration, entrust, read the reply, then disconnect.
|
||||
# This simulates a client which fails while the client is blocked.
|
||||
#
|
||||
# We attempt to reconnect immediately afterwards to prove that we can
|
||||
# retry the mirroring.
|
||||
# We expect the destination to quit with an error status.
|
||||
|
||||
require 'flexnbd/fake_source'
|
||||
include FlexNBD
|
||||
|
||||
addr, port, srv_pid, rebind_addr, rebind_port = *ARGV
|
||||
addr, port, srv_pid = *ARGV
|
||||
|
||||
client = FakeSource.new( addr, port, "Timed out connecting" )
|
||||
client.read_hello
|
||||
@@ -22,11 +21,13 @@ client.close
|
||||
|
||||
sleep(0.25)
|
||||
|
||||
client2 = FakeSource.new( addr, port, "Timed out reconnecting to mirror" )
|
||||
client2.send_mirror
|
||||
|
||||
sleep(1)
|
||||
client3 = FakeSource.new( rebind_addr, rebind_port, "Timed out reconnecting to read" )
|
||||
client3.close
|
||||
begin
|
||||
client2 = FakeSource.new( addr, port, "Expected timeout" )
|
||||
fail "Unexpected reconnection"
|
||||
rescue Timeout::Error
|
||||
# expected
|
||||
end
|
||||
|
||||
exit(0)
|
||||
|
||||
|
@@ -12,10 +12,11 @@ addr, port, srv_pid = *ARGV
|
||||
|
||||
client = FakeSource.new( addr, port, "Timed out connecting" )
|
||||
client.read_hello
|
||||
Process.kill( "STOP", srv_pid.to_i )
|
||||
|
||||
system "kill -STOP #{srv_pid}"
|
||||
client.write_write_request( 0, 8 )
|
||||
client.close
|
||||
Process.kill( "CONT", srv_pid.to_i )
|
||||
system "kill -CONT #{srv_pid}"
|
||||
|
||||
# This sleep ensures that we don't return control to the test runner
|
||||
# too soon, giving the flexnbd process time to fall over if it's going
|
||||
|
@@ -13,13 +13,13 @@ addr, port, srv_pid = *ARGV
|
||||
client = FakeSource.new( addr, port, "Timed out connecting" )
|
||||
client.read_hello
|
||||
|
||||
Process.kill( "STOP", srv_pid.to_i )
|
||||
system "kill -STOP #{srv_pid}"
|
||||
|
||||
client.write_write_request( 0, 8 )
|
||||
client.write_data( "12345678" )
|
||||
client.close
|
||||
|
||||
Process.kill( "CONT", srv_pid.to_i )
|
||||
system "kill -CONT #{srv_pid}"
|
||||
|
||||
# This sleep ensures that we don't return control to the test runner
|
||||
# too soon, giving the flexnbd process time to fall over if it's going
|
||||
|
@@ -13,10 +13,8 @@ addr, port = *ARGV
|
||||
|
||||
client = FakeSource.new( addr, port, "Timed out connecting", "127.0.0.6" )
|
||||
sleep( 0.25 )
|
||||
client.ensure_disconnected
|
||||
|
||||
rsp = client.disconnected? ? 0 : 1
|
||||
client.close
|
||||
exit(0)
|
||||
|
||||
|
||||
exit(rsp)
|
||||
|
||||
|
20
tests/acceptance/fakes/source/sigterm_after_hello.rb
Executable file
20
tests/acceptance/fakes/source/sigterm_after_hello.rb
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
# Connect to the listener, wait for the hello, then sigterm the
|
||||
# listener. We expect the listener to exit with a status of 6, which
|
||||
# is enforced in the test.
|
||||
|
||||
require 'flexnbd/fake_source'
|
||||
include FlexNBD
|
||||
|
||||
addr, port, pid = *ARGV
|
||||
|
||||
client = FakeSource.new( addr, port, "Timed out connecting." )
|
||||
client.read_hello
|
||||
|
||||
Process.kill( "TERM", pid.to_i )
|
||||
|
||||
sleep(0.2)
|
||||
client.close
|
||||
|
||||
exit(0)
|
@@ -1,29 +1,18 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
# Successfully send a migration, but squat on the IP and port which
|
||||
# the destination wants to rebind to. The destination should retry
|
||||
# every second, so we give it up then attempt to connect to the new
|
||||
# server.
|
||||
# Successfully send a migration. This test just makes sure that the
|
||||
# happy path is covered. We expect the destination to quit with a
|
||||
# success status.
|
||||
|
||||
require 'flexnbd/fake_source'
|
||||
include FlexNBD
|
||||
|
||||
addr, port, srv_pid, newaddr, newport = *ARGV
|
||||
|
||||
squatter = TCPServer.open( newaddr, newport.to_i )
|
||||
|
||||
client = FakeSource.new( addr, port, "Timed out connecting" )
|
||||
client.send_mirror()
|
||||
|
||||
sleep(1)
|
||||
|
||||
squatter.close()
|
||||
|
||||
sleep(1)
|
||||
|
||||
client2 = FakeSource.new( newaddr, newport.to_i, "Timed out reconnecting" )
|
||||
client2.read_hello
|
||||
client2.read( 0, 8 )
|
||||
client2.close
|
||||
|
||||
exit( 0 )
|
||||
|
@@ -8,6 +8,10 @@ class FileWriter
|
||||
@pattern = ""
|
||||
end
|
||||
|
||||
def size
|
||||
@blocksize * @pattern.split("").size
|
||||
end
|
||||
|
||||
# We write in fixed block sizes, given by "blocksize"
|
||||
# _ means skip a block
|
||||
# 0 means write a block full of zeroes
|
||||
|
@@ -21,7 +21,7 @@ class ValgrindExecutor
|
||||
attr_reader :pid
|
||||
|
||||
def run( cmd )
|
||||
@pid = fork do exec "valgrind --track-origins=yes #{cmd}" end
|
||||
@pid = fork do exec "valgrind --track-origins=yes --suppressions=custom.supp #{cmd}" end
|
||||
end
|
||||
end # class ValgrindExecutor
|
||||
|
||||
@@ -97,7 +97,9 @@ class ValgrindKillingExecutor
|
||||
when "line"
|
||||
@error.add_line( @text ) if @found
|
||||
when "error", "stack"
|
||||
if @found
|
||||
@killer.call( @error )
|
||||
end
|
||||
when "pid"
|
||||
@error.pid=@text
|
||||
end
|
||||
@@ -129,18 +131,18 @@ class ValgrindKillingExecutor
|
||||
|
||||
def run( cmd )
|
||||
@io_r, io_w = IO.pipe
|
||||
@pid = fork do exec( "valgrind --xml=yes --xml-fd=#{io_w.fileno} " + cmd ) end
|
||||
@pid = fork do exec( "valgrind --suppressions=custom.supp --xml=yes --xml-fd=#{io_w.fileno} " + cmd ) end
|
||||
launch_watch_thread( @pid, @io_r )
|
||||
@pid
|
||||
end
|
||||
|
||||
|
||||
def call( err )
|
||||
Process.kill( "KILL", @pid )
|
||||
$stderr.puts "*"*72
|
||||
$stderr.puts "* Valgrind error spotted:"
|
||||
$stderr.puts err.to_s.split("\n").map{|s| " #{s}"}
|
||||
$stderr.puts "*"*72
|
||||
Process.kill( "KILL", @pid )
|
||||
exit(1)
|
||||
end
|
||||
|
||||
@@ -163,10 +165,11 @@ class ValgrindKillingExecutor
|
||||
end # class ValgrindExecutor
|
||||
|
||||
|
||||
module FlexNBD
|
||||
# Noddy test class to exercise FlexNBD from the outside for testing.
|
||||
#
|
||||
class FlexNBD
|
||||
attr_reader :bin, :ctrl, :pid, :ip, :port, :rebind_ip, :rebind_port
|
||||
attr_reader :bin, :ctrl, :pid, :ip, :port
|
||||
|
||||
class << self
|
||||
def counter
|
||||
@@ -195,7 +198,9 @@ class FlexNBD
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(bin, ip, port, rebind_ip = ip, rebind_port = port)
|
||||
attr_accessor :prefetch_proxy
|
||||
|
||||
def initialize( bin, ip, port )
|
||||
@bin = bin
|
||||
@do_debug = ENV['DEBUG']
|
||||
@debug = build_debug_opt
|
||||
@@ -204,9 +209,8 @@ class FlexNBD
|
||||
@ctrl = "/tmp/.flexnbd.ctrl.#{Time.now.to_i}.#{rand}"
|
||||
@ip = ip
|
||||
@port = port
|
||||
@rebind_ip = rebind_ip
|
||||
@rebind_port = rebind_port
|
||||
@kill = []
|
||||
@prefetch_proxy = false
|
||||
end
|
||||
|
||||
|
||||
@@ -235,13 +239,21 @@ class FlexNBD
|
||||
"--addr #{ip} "\
|
||||
"--port #{port} "\
|
||||
"--file #{file} "\
|
||||
"--rebind-addr #{rebind_ip} " \
|
||||
"--rebind-port #{rebind_port} " \
|
||||
"--sock #{ctrl} "\
|
||||
"#{@debug} "\
|
||||
"#{acl.join(' ')}"
|
||||
end
|
||||
|
||||
def proxy_cmd( connect_ip, connect_port )
|
||||
"#{bin}-proxy "\
|
||||
"--addr #{ip} "\
|
||||
"--port #{port} "\
|
||||
"--conn-addr #{connect_ip} "\
|
||||
"--conn-port #{connect_port} "\
|
||||
"#{prefetch_proxy ? "--cache " : ""}"\
|
||||
"#{@debug}"
|
||||
end
|
||||
|
||||
|
||||
def read_cmd( offset, length )
|
||||
"#{bin} read "\
|
||||
@@ -263,14 +275,36 @@ class FlexNBD
|
||||
end
|
||||
|
||||
|
||||
def mirror_cmd(dest_ip, dest_port)
|
||||
"#{@bin} mirror "\
|
||||
def base_mirror_opts( dest_ip, dest_port )
|
||||
"--addr #{dest_ip} "\
|
||||
"--port #{dest_port} "\
|
||||
"--sock #{ctrl} "\
|
||||
end
|
||||
|
||||
def unlink_mirror_opts( dest_ip, dest_port )
|
||||
"#{base_mirror_opts( dest_ip, dest_port )} "\
|
||||
"--unlink "
|
||||
end
|
||||
|
||||
def base_mirror_cmd( opts )
|
||||
"#{@bin} mirror "\
|
||||
"#{opts} "\
|
||||
"#{@debug}"
|
||||
end
|
||||
|
||||
def mirror_cmd(dest_ip, dest_port)
|
||||
base_mirror_cmd( base_mirror_opts( dest_ip, dest_port ) )
|
||||
end
|
||||
|
||||
def mirror_unlink_cmd( dest_ip, dest_port )
|
||||
base_mirror_cmd( unlink_mirror_opts( dest_ip, dest_port ) )
|
||||
end
|
||||
|
||||
def break_cmd
|
||||
"#{@bin} break "\
|
||||
"--sock #{ctrl} "\
|
||||
"#{@debug}"
|
||||
end
|
||||
|
||||
def status_cmd
|
||||
"#{@bin} status "\
|
||||
@@ -291,34 +325,69 @@ class FlexNBD
|
||||
debug( cmd )
|
||||
|
||||
@pid = @executor.run( cmd )
|
||||
start_wait_thread( @pid )
|
||||
|
||||
while !File.socket?(ctrl)
|
||||
pid, status = Process.wait2(@pid, Process::WNOHANG)
|
||||
raise "server did not start (#{cmd})" if pid
|
||||
sleep 0.1
|
||||
end
|
||||
|
||||
|
||||
start_wait_thread( @pid )
|
||||
at_exit { kill }
|
||||
end
|
||||
private :run_serve_cmd
|
||||
|
||||
|
||||
def serve( file, *acl)
|
||||
run_serve_cmd( serve_cmd( file, acl ) )
|
||||
cmd = serve_cmd( file, acl )
|
||||
run_serve_cmd( cmd )
|
||||
sleep( 0.2 ) until File.exists?( ctrl )
|
||||
end
|
||||
|
||||
|
||||
def listen(file, *acl)
|
||||
run_serve_cmd( listen_cmd( file, acl ) )
|
||||
end
|
||||
|
||||
def tcp_server_open?
|
||||
# raises if the other side doesn't accept()
|
||||
sock = TCPSocket.new(ip, port) rescue nil
|
||||
|
||||
success = !!sock
|
||||
( sock.close rescue nil) if sock
|
||||
success
|
||||
end
|
||||
|
||||
def proxy( connect_ip, connect_port )
|
||||
cmd = proxy_cmd( connect_ip, connect_port )
|
||||
debug( cmd )
|
||||
|
||||
@pid = @executor.run( cmd )
|
||||
|
||||
until tcp_server_open?
|
||||
pid, status = Process.wait2(@pid, Process::WNOHANG)
|
||||
raise "server did not start (#{cmd})" if pid
|
||||
sleep 0.1
|
||||
end
|
||||
|
||||
start_wait_thread( @pid )
|
||||
at_exit { kill }
|
||||
end
|
||||
|
||||
|
||||
def start_wait_thread( pid )
|
||||
@wait_thread = Thread.start do
|
||||
_, status = Process.waitpid2( pid )
|
||||
|
||||
if @kill
|
||||
fail "flexnbd quit with a bad status: #{status.exitstatus}" unless
|
||||
if status.signaled?
|
||||
fail "flexnbd quit with a bad signal: #{status.inspect}" unless
|
||||
@kill.include? status.termsig
|
||||
else
|
||||
fail "flexnbd quit with a bad status: #{status.inspect}" unless
|
||||
@kill.include? status.exitstatus
|
||||
end
|
||||
else
|
||||
$stderr.puts "flexnbd #{self.pid} quit"
|
||||
fail "flexnbd #{self.pid} quit early with status #{status.to_i}"
|
||||
@@ -383,14 +452,28 @@ class FlexNBD
|
||||
end
|
||||
|
||||
|
||||
def mirror_unlink( dest_ip, dest_port, timeout=nil )
|
||||
cmd = mirror_unlink_cmd( dest_ip, dest_port )
|
||||
debug( cmd )
|
||||
|
||||
maybe_timeout( cmd, timeout )
|
||||
end
|
||||
|
||||
|
||||
def maybe_timeout(cmd, timeout=nil )
|
||||
stdout, stderr = "",""
|
||||
stat = nil
|
||||
run = Proc.new do
|
||||
Open3.popen3( cmd ) do |io_in, io_out, io_err|
|
||||
# Ruby 1.9 changed the popen3 api. instead of 3 args, the block
|
||||
# gets 4. Not only that, but it no longer sets $?, so we have to
|
||||
# go elsewhere for the process' exit status.
|
||||
Open3.popen3( cmd ) do |io_in, io_out, io_err, maybe_thr|
|
||||
io_in.close
|
||||
stdout.replace io_out.read
|
||||
stderr.replace io_err.read
|
||||
stat = maybe_thr.value if maybe_thr
|
||||
end
|
||||
stat ||= $?
|
||||
end
|
||||
|
||||
if timeout
|
||||
@@ -399,18 +482,27 @@ class FlexNBD
|
||||
run.call
|
||||
end
|
||||
|
||||
[stdout, stderr]
|
||||
[stdout, stderr, stat]
|
||||
end
|
||||
|
||||
|
||||
def mirror(dest_ip, dest_port, bandwidth=nil, action=nil)
|
||||
stdout, stderr = mirror_unchecked( dest_ip, dest_port, bandwidth, action )
|
||||
raise IOError.new( "Migrate command failed\n" + stderr) unless $?.success?
|
||||
stdout, stderr, status = mirror_unchecked( dest_ip, dest_port, bandwidth, action )
|
||||
raise IOError.new( "Migrate command failed\n" + stderr) unless status.success?
|
||||
|
||||
stdout
|
||||
end
|
||||
|
||||
|
||||
|
||||
def break(timeout=nil)
|
||||
cmd = break_cmd
|
||||
debug( cmd )
|
||||
|
||||
maybe_timeout( cmd, timeout )
|
||||
end
|
||||
|
||||
|
||||
def acl(*acl)
|
||||
cmd = acl_cmd( *acl )
|
||||
debug( cmd )
|
||||
@@ -434,6 +526,14 @@ class FlexNBD
|
||||
end
|
||||
|
||||
|
||||
def paused
|
||||
Process.kill( "STOP", @pid )
|
||||
yield
|
||||
ensure
|
||||
Process.kill( "CONT", @pid )
|
||||
end
|
||||
|
||||
|
||||
protected
|
||||
def control_command(*args)
|
||||
raise "Server not running" unless @pid
|
||||
@@ -465,3 +565,5 @@ class FlexNBD
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
@@ -2,6 +2,14 @@
|
||||
|
||||
module FlexNBD
|
||||
|
||||
def self.binary( str )
|
||||
if str.respond_to? :force_encoding
|
||||
str.force_encoding "ASCII-8BIT"
|
||||
else
|
||||
str
|
||||
end
|
||||
end
|
||||
|
||||
# eeevil is his one and only name...
|
||||
def self.read_constants
|
||||
parents = []
|
||||
@@ -17,14 +25,14 @@ module FlexNBD
|
||||
|
||||
fail "No source root!" unless source_root
|
||||
|
||||
headers = Dir[File.join( source_root, "src", "*.h" ) ]
|
||||
headers = Dir[File.join( source_root, "src", "{common,proxy,server}","*.h" ) ]
|
||||
|
||||
headers.each do |header_filename|
|
||||
txt_lines = File.readlines( header_filename )
|
||||
txt_lines.each do |line|
|
||||
if line =~ /^#\s*define\s+([A-Z0-9_]+)\s+(\d+)\s*$/
|
||||
# Bodge until I can figure out what to do with #ifdefs
|
||||
const_set($1, $2.to_i) unless constants.include?( $1 )
|
||||
const_set($1, $2.to_i) unless const_defined?( $1 )
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -32,6 +40,9 @@ module FlexNBD
|
||||
end
|
||||
|
||||
read_constants()
|
||||
|
||||
REQUEST_MAGIC = binary("\x25\x60\x95\x13") unless defined?(REQUEST_MAGIC)
|
||||
REPLY_MAGIC = binary("\x67\x44\x66\x98") unless defined?(REPLY_MAGIC)
|
||||
|
||||
end # module FlexNBD
|
||||
|
||||
|
||||
|
@@ -56,8 +56,6 @@ module FlexNBD
|
||||
}
|
||||
end
|
||||
|
||||
REPLY_MAGIC="\x67\x44\x66\x98"
|
||||
|
||||
def write_error( handle )
|
||||
write_reply( handle, 1 )
|
||||
end
|
||||
@@ -76,7 +74,7 @@ module FlexNBD
|
||||
if opts[:magic] == :wrong
|
||||
write_rand( @sock, 4 )
|
||||
else
|
||||
@sock.write( REPLY_MAGIC )
|
||||
@sock.write( ::FlexNBD::REPLY_MAGIC )
|
||||
end
|
||||
|
||||
@sock.write( [err].pack("N") )
|
||||
@@ -93,6 +91,10 @@ module FlexNBD
|
||||
@sock.read( len )
|
||||
end
|
||||
|
||||
def write_data( len )
|
||||
@sock.write( len )
|
||||
end
|
||||
|
||||
|
||||
def self.parse_be64(str)
|
||||
raise "String is the wrong length: 8 bytes expected (#{str.length} received)" unless
|
||||
@@ -136,7 +138,7 @@ module FlexNBD
|
||||
end
|
||||
|
||||
|
||||
def accept( err_msg = "Timed out waiting for a connection", timeout = 2)
|
||||
def accept( err_msg = "Timed out waiting for a connection", timeout = 5)
|
||||
client_sock = nil
|
||||
|
||||
begin
|
||||
@@ -161,3 +163,4 @@ module FlexNBD
|
||||
|
||||
end # module FakeDest
|
||||
end # module FlexNBD
|
||||
|
||||
|
@@ -9,11 +9,17 @@ module FlexNBD
|
||||
|
||||
def initialize( addr, port, err_msg, source_addr=nil, source_port=0 )
|
||||
timing_out( 2, err_msg ) do
|
||||
begin
|
||||
@sock = if source_addr
|
||||
TCPSocket.new( addr, port, source_addr, source_port )
|
||||
else
|
||||
TCPSocket.new( addr, port )
|
||||
end
|
||||
rescue Errno::ECONNREFUSED
|
||||
$stderr.puts "Connection refused, retrying"
|
||||
sleep(0.2)
|
||||
retry
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -24,7 +30,7 @@ module FlexNBD
|
||||
|
||||
|
||||
def read_hello()
|
||||
timing_out( FlexNBD::MS_HELLO_TIME_SECS,
|
||||
timing_out( ::FlexNBD::MS_HELLO_TIME_SECS,
|
||||
"Timed out waiting for hello." ) do
|
||||
fail "No hello." unless (hello = @sock.read( 152 )) &&
|
||||
hello.length==152
|
||||
@@ -41,14 +47,13 @@ module FlexNBD
|
||||
end
|
||||
|
||||
|
||||
def send_request( type, handle="myhandle", from=0, len=0 )
|
||||
def send_request( type, handle="myhandle", from=0, len=0, magic=REQUEST_MAGIC )
|
||||
fail "Bad handle" unless handle.length == 8
|
||||
|
||||
@sock.write( "\x25\x60\x95\x13" )
|
||||
@sock.write( magic )
|
||||
@sock.write( [type].pack( 'N' ) )
|
||||
@sock.write( handle )
|
||||
@sock.write( "\x0"*4 )
|
||||
@sock.write( [from].pack( 'N' ) )
|
||||
@sock.write( [n64( from )].pack( 'q' ) )
|
||||
@sock.write( [len].pack( 'N' ) )
|
||||
end
|
||||
|
||||
@@ -90,16 +95,19 @@ module FlexNBD
|
||||
|
||||
def send_mirror
|
||||
read_hello()
|
||||
write_write_request( 0, 8 )
|
||||
write_data( "12345678" )
|
||||
read_response()
|
||||
write_entrust_request()
|
||||
write( 0, "12345678" )
|
||||
read_response()
|
||||
write_disconnect_request()
|
||||
close()
|
||||
end
|
||||
|
||||
|
||||
def write( from, data )
|
||||
write_write_request( from, data.length )
|
||||
write_data( data )
|
||||
end
|
||||
|
||||
|
||||
def read_response
|
||||
magic = @sock.read(4)
|
||||
error_s = @sock.read(4)
|
||||
@@ -113,10 +121,10 @@ module FlexNBD
|
||||
end
|
||||
|
||||
|
||||
def ensure_disconnected
|
||||
Timeout.timeout( 2 ) do
|
||||
@sock.read(1)
|
||||
end
|
||||
def disconnected?
|
||||
result = nil
|
||||
Timeout.timeout( 2 ) { result = ( @sock.read(1) == nil ) }
|
||||
result
|
||||
end
|
||||
|
||||
|
||||
@@ -131,6 +139,22 @@ module FlexNBD
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# take a 64-bit number, turn it upside down (due to :
|
||||
# http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/11920
|
||||
# )
|
||||
def n64(b)
|
||||
((b & 0xff00000000000000) >> 56) |
|
||||
((b & 0x00ff000000000000) >> 40) |
|
||||
((b & 0x0000ff0000000000) >> 24) |
|
||||
((b & 0x000000ff00000000) >> 8) |
|
||||
((b & 0x00000000ff000000) << 8) |
|
||||
((b & 0x0000000000ff0000) << 24) |
|
||||
((b & 0x000000000000ff00) << 40) |
|
||||
((b & 0x00000000000000ff) << 56)
|
||||
end
|
||||
|
||||
end # class FakeSource
|
||||
end # module FlexNBD
|
||||
|
||||
|
194
tests/acceptance/proxy_tests.rb
Normal file
194
tests/acceptance/proxy_tests.rb
Normal file
@@ -0,0 +1,194 @@
|
||||
# encoding: utf-8
|
||||
require 'flexnbd/fake_source'
|
||||
require 'flexnbd/fake_dest'
|
||||
|
||||
module ProxyTests
|
||||
def b
|
||||
"\xFF".b
|
||||
end
|
||||
|
||||
def with_proxied_client( override_size = nil )
|
||||
@env.serve1 unless @server_up
|
||||
@env.proxy2 unless @proxy_up
|
||||
@env.nbd2.can_die(0)
|
||||
client = FlexNBD::FakeSource.new(@env.ip, @env.port2, "Couldn't connect to proxy")
|
||||
begin
|
||||
|
||||
result = client.read_hello
|
||||
assert_equal "NBDMAGIC", result[:magic]
|
||||
assert_equal override_size || @env.file1.size, result[:size]
|
||||
|
||||
yield client
|
||||
ensure
|
||||
client.close rescue nil
|
||||
end
|
||||
end
|
||||
|
||||
def test_exits_with_error_when_cannot_connect_to_upstream_on_start
|
||||
assert_raises(RuntimeError) { @env.proxy1 }
|
||||
end
|
||||
|
||||
def test_read_requests_successfully_proxied
|
||||
with_proxied_client do |client|
|
||||
(0..3).each do |n|
|
||||
offset = n * 4096
|
||||
client.write_read_request(offset, 4096, "myhandle")
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
orig_data = @env.file1.read(offset, 4096)
|
||||
data = client.read_raw(4096)
|
||||
|
||||
assert_equal 4096, orig_data.size
|
||||
assert_equal 4096, data.size
|
||||
|
||||
assert_equal( orig_data, data,
|
||||
"Returned data does not match on request #{n+1}" )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_write_requests_successfully_proxied
|
||||
with_proxied_client do |client|
|
||||
(0..3).each do |n|
|
||||
offset = n * 4096
|
||||
client.write(offset, b * 4096)
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
data = @env.file1.read(offset, 4096)
|
||||
|
||||
assert_equal( ( b * 4096 ), data, "Data not written correctly (offset is #{n})" )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def make_fake_server
|
||||
server = FlexNBD::FakeDest.new(@env.ip, @env.port1)
|
||||
@server_up = true
|
||||
|
||||
# We return a thread here because accept() and connect() both block for us
|
||||
Thread.new do
|
||||
sc = server.accept # just tell the supervisor we're up
|
||||
sc.write_hello
|
||||
|
||||
[ server, sc ]
|
||||
end
|
||||
end
|
||||
|
||||
def test_read_request_retried_when_upstream_dies_partway
|
||||
maker = make_fake_server
|
||||
|
||||
with_proxied_client(4096) do |client|
|
||||
server, sc1 = maker.value
|
||||
|
||||
# Send the read request to the proxy
|
||||
client.write_read_request( 0, 4096 )
|
||||
|
||||
# ensure we're given the read request
|
||||
req1 = sc1.read_request
|
||||
assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic]
|
||||
assert_equal ::FlexNBD::REQUEST_READ, req1[:type]
|
||||
assert_equal 0, req1[:from]
|
||||
assert_not_equal 0, req1[:len]
|
||||
|
||||
# Kill the server again, now we're sure the read request has been sent once
|
||||
sc1.close
|
||||
|
||||
# We expect the proxy to reconnect without our client doing anything.
|
||||
sc2 = server.accept
|
||||
sc2.write_hello
|
||||
|
||||
# And once reconnected, it should resend an identical request.
|
||||
req2 = sc2.read_request
|
||||
assert_equal req1, req2
|
||||
|
||||
# The reply should be proxied back to the client.
|
||||
sc2.write_reply( req2[:handle] )
|
||||
sc2.write_data( b * 4096 )
|
||||
|
||||
# Check it to make sure it's correct
|
||||
rsp = timeout(15) { client.read_response }
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
assert_equal req1[:handle], rsp[:handle]
|
||||
|
||||
data = client.read_raw( 4096 )
|
||||
assert_equal( (b * 4096), data, "Wrong data returned" )
|
||||
|
||||
sc2.close
|
||||
server.close
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
def test_write_request_retried_when_upstream_dies_partway
|
||||
maker = make_fake_server
|
||||
|
||||
with_proxied_client(4096) do |client|
|
||||
server, sc1 = maker.value
|
||||
|
||||
# Send the read request to the proxy
|
||||
client.write( 0, ( b * 4096 ) )
|
||||
|
||||
# ensure we're given the read request
|
||||
req1 = sc1.read_request
|
||||
assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic]
|
||||
assert_equal ::FlexNBD::REQUEST_WRITE, req1[:type]
|
||||
assert_equal 0, req1[:from]
|
||||
assert_equal 4096, req1[:len]
|
||||
data1 = sc1.read_data( 4096 )
|
||||
assert_equal( ( b * 4096 ), data1, "Data not proxied successfully" )
|
||||
|
||||
# Kill the server again, now we're sure the read request has been sent once
|
||||
sc1.close
|
||||
|
||||
# We expect the proxy to reconnect without our client doing anything.
|
||||
sc2 = server.accept
|
||||
sc2.write_hello
|
||||
|
||||
# And once reconnected, it should resend an identical request.
|
||||
req2 = sc2.read_request
|
||||
assert_equal req1, req2
|
||||
data2 = sc2.read_data( 4096 )
|
||||
assert_equal data1, data2
|
||||
|
||||
# The reply should be proxied back to the client.
|
||||
sc2.write_reply( req2[:handle] )
|
||||
|
||||
# Check it to make sure it's correct
|
||||
rsp = timeout(15) { client.read_response }
|
||||
assert_equal ::FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
assert_equal req1[:handle], rsp[:handle]
|
||||
|
||||
sc2.close
|
||||
server.close
|
||||
end
|
||||
end
|
||||
|
||||
def test_only_one_client_can_connect_to_proxy_at_a_time
|
||||
with_proxied_client do |client|
|
||||
|
||||
c2 = nil
|
||||
assert_raises(Timeout::Error) do
|
||||
timeout(1) do
|
||||
c2 = FlexNBD::FakeSource.new(@env.ip, @env.port2, "Couldn't connect to proxy (2)")
|
||||
c2.read_hello
|
||||
end
|
||||
end
|
||||
c2.close rescue nil if c2
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
@@ -21,12 +21,18 @@ class TestDestErrorHandling < Test::Unit::TestCase
|
||||
assert_no_control
|
||||
end
|
||||
|
||||
|
||||
=begin
|
||||
# This is disabled while CLIENT_MAX_WAIT_SECS is removed
|
||||
def test_hello_goes_astray_causes_timeout_error
|
||||
run_fake( "source/hang_after_hello" )
|
||||
assert_no_control
|
||||
end
|
||||
=end
|
||||
|
||||
def test_sigterm_has_bad_exit_status
|
||||
@env.nbd1.can_die(1)
|
||||
run_fake( "source/sigterm_after_hello" )
|
||||
end
|
||||
|
||||
def test_disconnect_after_hello_causes_error_not_fatal
|
||||
run_fake( "source/close_after_hello" )
|
||||
@@ -58,10 +64,6 @@ class TestDestErrorHandling < Test::Unit::TestCase
|
||||
run_fake( "source/close_after_write" )
|
||||
end
|
||||
|
||||
def test_disconnect_before_entrust_reply_causes_error
|
||||
run_fake( "source/close_after_entrust" )
|
||||
end
|
||||
|
||||
|
||||
def test_disconnect_before_write_reply_causes_error
|
||||
# Note that this is an odd case: writing the reply doesn't fail.
|
||||
@@ -71,23 +73,16 @@ class TestDestErrorHandling < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
|
||||
def test_disconnect_after_entrust_reply_causes_error
|
||||
|
||||
def test_straight_migration
|
||||
@env.nbd1.can_die(0)
|
||||
# This fake runs a failed migration then a succeeding one, so we
|
||||
# expect the destination to take control.
|
||||
run_fake( "source/close_after_entrust_reply" )
|
||||
assert_control
|
||||
end
|
||||
|
||||
|
||||
def test_cant_rebind_retries
|
||||
run_fake( "source/successful_transfer" )
|
||||
end
|
||||
|
||||
|
||||
private
|
||||
def run_fake( name )
|
||||
@env.run_fake( name, @env.ip, @env.port1, @env.ip, @env.rebind_port1 )
|
||||
@env.run_fake( name, @env.ip, @env.port1 )
|
||||
assert @env.fake_reports_success, "#{name} failed."
|
||||
end
|
||||
|
||||
@@ -105,3 +100,4 @@ class TestDestErrorHandling < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
end # class TestDestErrorHandling
|
||||
|
||||
|
@@ -2,12 +2,17 @@
|
||||
|
||||
require 'test/unit'
|
||||
require 'environment'
|
||||
require 'flexnbd/constants'
|
||||
|
||||
class TestHappyPath < Test::Unit::TestCase
|
||||
def setup
|
||||
@env = Environment.new
|
||||
end
|
||||
|
||||
def bin(str)
|
||||
FlexNBD.binary str
|
||||
end
|
||||
|
||||
def teardown
|
||||
@env.nbd1.can_die(0)
|
||||
@env.nbd2.can_die(0)
|
||||
@@ -22,13 +27,13 @@ class TestHappyPath < Test::Unit::TestCase
|
||||
[0, 12, 63].each do |num|
|
||||
|
||||
assert_equal(
|
||||
@env.nbd1.read(num*@env.blocksize, @env.blocksize),
|
||||
@env.file1.read(num*@env.blocksize, @env.blocksize)
|
||||
bin( @env.nbd1.read(num*@env.blocksize, @env.blocksize) ),
|
||||
bin( @env.file1.read(num*@env.blocksize, @env.blocksize) )
|
||||
)
|
||||
end
|
||||
|
||||
[124, 1200, 10028, 25488].each do |num|
|
||||
assert_equal(@env.nbd1.read(num, 4), @env.file1.read(num, 4))
|
||||
assert_equal(bin(@env.nbd1.read(num, 4)), bin(@env.file1.read(num, 4)))
|
||||
end
|
||||
end
|
||||
|
||||
@@ -64,25 +69,57 @@ class TestHappyPath < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
|
||||
def test_mirror
|
||||
def setup_to_mirror
|
||||
@env.writefile1( "f"*4 )
|
||||
@env.serve1
|
||||
|
||||
@env.writefile2( "0"*4 )
|
||||
@env.listen2
|
||||
|
||||
@env.nbd1.can_die
|
||||
stdout, stderr = @env.mirror12
|
||||
|
||||
@env.nbd1.join
|
||||
|
||||
assert_equal(@env.file1.read_original( 0, @env.blocksize ),
|
||||
@env.file2.read( 0, @env.blocksize ) )
|
||||
assert @env.status2['has_control'], "destination didn't take control"
|
||||
end
|
||||
|
||||
|
||||
def test_mirror
|
||||
@env.nbd1.can_die
|
||||
@env.nbd2.can_die(0)
|
||||
setup_to_mirror()
|
||||
|
||||
stdout, stderr = @env.mirror12
|
||||
|
||||
@env.nbd1.join
|
||||
@env.nbd2.join
|
||||
|
||||
assert( File.file?( @env.filename1 ),
|
||||
"The source file was incorrectly deleted")
|
||||
assert_equal(@env.file1.read_original( 0, @env.blocksize ),
|
||||
@env.file2.read( 0, @env.blocksize ) )
|
||||
end
|
||||
|
||||
|
||||
def test_mirror_unlink
|
||||
@env.nbd1.can_die(0)
|
||||
@env.nbd2.can_die(0)
|
||||
setup_to_mirror()
|
||||
|
||||
assert File.file?( @env.filename1 )
|
||||
|
||||
stdout, stderr = @env.mirror12_unlink
|
||||
|
||||
assert_no_match( /unrecognized/, stderr )
|
||||
|
||||
|
||||
Timeout.timeout(10) do @env.nbd1.join end
|
||||
|
||||
assert !File.file?( @env.filename1 )
|
||||
end
|
||||
|
||||
|
||||
|
||||
def test_write_to_high_block
|
||||
#
|
||||
# This test does not work on 32 bit platforms.
|
||||
#
|
||||
skip("Not relevant on 32-bit platforms") if ( ["a"].pack("p").size < 8 )
|
||||
|
||||
# Create a large file, then try to write to somewhere after the 2G boundary
|
||||
@env.truncate1 "4G"
|
||||
@env.serve1
|
||||
@@ -92,4 +129,41 @@ class TestHappyPath < Test::Unit::TestCase
|
||||
assert_equal "12345678", @env.nbd1.read( 2**31+2**29, 8 )
|
||||
end
|
||||
|
||||
|
||||
def test_set_acl
|
||||
# Just check that we get sane feedback here
|
||||
@env.writefile1( "f"*4 )
|
||||
@env.serve1
|
||||
|
||||
_,stderr = @env.acl1("127.0.0.1")
|
||||
assert_no_match( /^(F|E):/, stderr )
|
||||
end
|
||||
|
||||
|
||||
def test_write_more_than_one_run
|
||||
one_mb = 2**20
|
||||
data = "\0" * 256 * one_mb
|
||||
|
||||
File.open(@env.filename1, "wb") do |f| f.write( "1" * 256 * one_mb ) end
|
||||
|
||||
@env.serve1
|
||||
sleep 5
|
||||
@env.write1( data )
|
||||
@env.nbd1.can_die(0)
|
||||
@env.nbd1.kill
|
||||
|
||||
i = 0
|
||||
File.open(@env.filename1, "rb") do |f|
|
||||
while mb = f.read( one_mb )
|
||||
unless "\0"*one_mb == mb
|
||||
msg = "Read non-zeros after offset %x:\n"%(i * one_mb)
|
||||
msg += `hexdump #{@env.filename1} | head -n5`
|
||||
fail msg
|
||||
end
|
||||
i += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
22
tests/acceptance/test_prefetch_proxy_mode.rb
Normal file
22
tests/acceptance/test_prefetch_proxy_mode.rb
Normal file
@@ -0,0 +1,22 @@
|
||||
require 'test/unit'
|
||||
require 'environment'
|
||||
require 'proxy_tests'
|
||||
|
||||
|
||||
class TestPrefetchProxyMode < Test::Unit::TestCase
|
||||
include ProxyTests
|
||||
|
||||
def setup
|
||||
super
|
||||
@env = Environment.new
|
||||
@env.prefetch_proxy!
|
||||
@env.writefile1( "f" * 16 )
|
||||
end
|
||||
|
||||
def teardown
|
||||
@env.cleanup
|
||||
super
|
||||
end
|
||||
end
|
||||
|
||||
|
20
tests/acceptance/test_proxy_mode.rb
Normal file
20
tests/acceptance/test_proxy_mode.rb
Normal file
@@ -0,0 +1,20 @@
|
||||
require 'test/unit'
|
||||
require 'environment'
|
||||
require 'proxy_tests'
|
||||
|
||||
|
||||
class TestProxyMode < Test::Unit::TestCase
|
||||
include ProxyTests
|
||||
|
||||
def setup
|
||||
super
|
||||
@env = Environment.new
|
||||
@env.writefile1( "f" * 16 )
|
||||
end
|
||||
|
||||
def teardown
|
||||
@env.cleanup
|
||||
super
|
||||
end
|
||||
end
|
||||
|
110
tests/acceptance/test_serve_mode.rb
Normal file
110
tests/acceptance/test_serve_mode.rb
Normal file
@@ -0,0 +1,110 @@
|
||||
require 'test/unit'
|
||||
require 'environment'
|
||||
require 'flexnbd/fake_source'
|
||||
|
||||
class TestServeMode < Test::Unit::TestCase
|
||||
|
||||
def setup
|
||||
super
|
||||
@b = "\xFF".b
|
||||
@env = Environment.new
|
||||
@env.writefile1( "0" )
|
||||
@env.serve1
|
||||
end
|
||||
|
||||
def teardown
|
||||
@env.cleanup
|
||||
super
|
||||
end
|
||||
|
||||
def connect_to_server
|
||||
client = FlexNBD::FakeSource.new(@env.ip, @env.port1, "Connecting to server failed")
|
||||
begin
|
||||
result = client.read_hello
|
||||
assert_equal "NBDMAGIC", result[:magic]
|
||||
assert_equal @env.file1.size, result[:size]
|
||||
yield client
|
||||
ensure
|
||||
client.close rescue nil
|
||||
end
|
||||
end
|
||||
|
||||
def test_bad_request_magic_receives_error_response
|
||||
connect_to_server do |client|
|
||||
|
||||
# replace REQUEST_MAGIC with all 0s to make it look bad
|
||||
client.send_request( 0, "myhandle", 0, 0, "\x00\x00\x00\x00" )
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert rsp[:error] != 0, "Server sent success reply back: #{rsp[:error]}"
|
||||
|
||||
# The client should be disconnected now
|
||||
assert client.disconnected?, "Server not disconnected"
|
||||
end
|
||||
end
|
||||
|
||||
def test_long_write_on_top_of_short_write_is_respected
|
||||
|
||||
connect_to_server do |client|
|
||||
# Start with a file of all-zeroes.
|
||||
client.write( 0, "\x00" * @env.file1.size )
|
||||
rsp = client.read_response
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
client.write( 0, @b )
|
||||
rsp = client.read_response
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
|
||||
client.write( 0, @b * 2 )
|
||||
rsp = client.read_response
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal 0, rsp[:error]
|
||||
end
|
||||
|
||||
assert_equal @b * 2, @env.file1.read( 0, 2 )
|
||||
end
|
||||
|
||||
|
||||
def test_read_request_out_of_bounds_receives_error_response
|
||||
connect_to_server do |client|
|
||||
client.write_read_request( @env.file1.size, 4096 )
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert rsp[:error] != 0, "Server sent success reply back: #{rsp[:error]}"
|
||||
|
||||
# Ensure we're not disconnected by sending a request. We don't care about
|
||||
# whether the reply is good or not, here.
|
||||
client.write_read_request( 0, 4096 )
|
||||
rsp = client.read_response
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
end
|
||||
end
|
||||
|
||||
def test_write_request_out_of_bounds_receives_error_response
|
||||
connect_to_server do |client|
|
||||
client.write( @env.file1.size, "\x00" * 4096 )
|
||||
rsp = client.read_response
|
||||
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
assert_equal "myhandle", rsp[:handle]
|
||||
assert rsp[:error] != 0, "Server sent success reply back: #{rsp[:error]}"
|
||||
|
||||
# Ensure we're not disconnected by sending a request. We don't care about
|
||||
# whether the reply is good or not, here.
|
||||
client.write( 0, "\x00" * @env.file1.size )
|
||||
rsp = client.read_response
|
||||
assert_equal FlexNBD::REPLY_MAGIC, rsp[:magic]
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
end
|
||||
|
@@ -7,6 +7,9 @@ require 'environment'
|
||||
class TestSourceErrorHandling < Test::Unit::TestCase
|
||||
|
||||
def setup
|
||||
@old_env = ENV['FLEXNBD_MS_REQUEST_LIMIT_SECS']
|
||||
ENV['FLEXNBD_MS_REQUEST_LIMIT_SECS'] = "4.0"
|
||||
|
||||
@env = Environment.new
|
||||
@env.writefile1( "f" * 4 )
|
||||
@env.serve1
|
||||
@@ -16,15 +19,28 @@ class TestSourceErrorHandling < Test::Unit::TestCase
|
||||
def teardown
|
||||
@env.nbd1.can_die(0)
|
||||
@env.cleanup
|
||||
ENV['FLEXNBD_MS_REQUEST_LIMIT_SECS'] = @old_env
|
||||
end
|
||||
|
||||
|
||||
def expect_term_during_migration
|
||||
@env.nbd1.can_die(1,9)
|
||||
end
|
||||
|
||||
|
||||
def test_failure_to_connect_reported_in_mirror_cmd_response
|
||||
stdout, stderr = @env.mirror12_unchecked
|
||||
expect_term_during_migration
|
||||
assert_match( /failed to connect/, stderr )
|
||||
end
|
||||
|
||||
|
||||
def test_sigterm_after_hello_quits_with_status_of_1
|
||||
expect_term_during_migration
|
||||
run_fake( "dest/sigterm_after_hello" )
|
||||
end
|
||||
|
||||
|
||||
def test_destination_hangs_after_connect_reports_error_at_source
|
||||
run_fake( "dest/hang_after_connect",
|
||||
:err => /Remote server failed to respond/ )
|
||||
@@ -36,6 +52,7 @@ class TestSourceErrorHandling < Test::Unit::TestCase
|
||||
:err => /Mirror was rejected/ )
|
||||
end
|
||||
|
||||
|
||||
def test_wrong_size_causes_disconnect
|
||||
run_fake( "dest/hello_wrong_size",
|
||||
:err => /Remote size does not match local size/ )
|
||||
@@ -43,57 +60,57 @@ class TestSourceErrorHandling < Test::Unit::TestCase
|
||||
|
||||
|
||||
def test_wrong_magic_causes_disconnect
|
||||
expect_term_during_migration
|
||||
run_fake( "dest/hello_wrong_magic",
|
||||
:err => /Mirror was rejected/ )
|
||||
end
|
||||
|
||||
|
||||
def test_disconnect_after_hello_causes_retry
|
||||
expect_term_during_migration
|
||||
run_fake( "dest/close_after_hello",
|
||||
:out => /Mirror started/ )
|
||||
end
|
||||
|
||||
|
||||
def test_write_times_out_causes_retry
|
||||
expect_term_during_migration
|
||||
run_fake( "dest/hang_after_write" )
|
||||
end
|
||||
|
||||
|
||||
def test_rejected_write_causes_retry
|
||||
expect_term_during_migration
|
||||
run_fake( "dest/error_on_write" )
|
||||
end
|
||||
|
||||
|
||||
def test_disconnect_before_write_reply_causes_retry
|
||||
expect_term_during_migration
|
||||
run_fake( "dest/close_after_write" )
|
||||
end
|
||||
|
||||
|
||||
def test_bad_write_reply_causes_retry
|
||||
expect_term_during_migration
|
||||
run_fake( "dest/write_wrong_magic" )
|
||||
end
|
||||
|
||||
|
||||
def test_pre_entrust_disconnect_causes_retry
|
||||
expect_term_during_migration
|
||||
run_fake( "dest/close_after_writes" )
|
||||
end
|
||||
|
||||
|
||||
def test_post_entrust_disconnect_causes_retry
|
||||
@env.nbd1.can_die(0)
|
||||
run_fake( "dest/close_after_entrust" )
|
||||
def test_cancel_migration
|
||||
run_fake( "dest/break_after_hello" )
|
||||
end
|
||||
|
||||
|
||||
def test_entrust_error_causes_retry
|
||||
run_fake( "dest/error_on_entrust" )
|
||||
end
|
||||
|
||||
|
||||
|
||||
private
|
||||
def run_fake(name, opts = {})
|
||||
@env.run_fake( name, @env.ip, @env.port2 )
|
||||
@env.run_fake( name, @env.ip, @env.port2, @env.nbd1.ctrl )
|
||||
stdout, stderr = @env.mirror12_unchecked
|
||||
assert_success
|
||||
assert_match( opts[:err], stderr ) if opts[:err]
|
||||
|
171
tests/acceptance/test_write_during_migration.rb
Executable file
171
tests/acceptance/test_write_during_migration.rb
Executable file
@@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require 'test/unit'
|
||||
require 'flexnbd/fake_source'
|
||||
require 'socket'
|
||||
require 'fileutils'
|
||||
require 'tmpdir'
|
||||
|
||||
Thread.abort_on_exception = true
|
||||
|
||||
class TestWriteDuringMigration < Test::Unit::TestCase
|
||||
|
||||
def setup
|
||||
@flexnbd = File.expand_path("../../build/flexnbd")
|
||||
|
||||
raise "No binary!" unless File.executable?( @flexnbd )
|
||||
|
||||
|
||||
@size = 20*1024*1024 # 20MB
|
||||
@write_data = "foo!" * 2048 # 8K write
|
||||
@source_port = 9990
|
||||
@dest_port = 9991
|
||||
@source_sock = "src.sock"
|
||||
@dest_sock = "dst.sock"
|
||||
@source_file = "src.file"
|
||||
@dest_file = "dst.file"
|
||||
end
|
||||
|
||||
|
||||
def teardown
|
||||
[@dst_proc, @src_proc].each do |pid|
|
||||
if pid
|
||||
Process.kill( "KILL", pid ) rescue nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def debug_arg
|
||||
ENV['DEBUG'] ? "--verbose" : ""
|
||||
end
|
||||
|
||||
|
||||
def launch_servers
|
||||
@dst_proc = fork() {
|
||||
cmd = "#{@flexnbd} listen -l 127.0.0.1 -p #{@dest_port} -f #{@dest_file} -s #{@dest_sock} #{debug_arg}"
|
||||
exec cmd
|
||||
}
|
||||
|
||||
@src_proc = fork() {
|
||||
cmd = "#{@flexnbd} serve -l 127.0.0.1 -p #{@source_port} -f #{@source_file} -s #{@source_sock} #{debug_arg}"
|
||||
exec cmd
|
||||
}
|
||||
begin
|
||||
awaiting = nil
|
||||
Timeout.timeout(10) do
|
||||
awaiting = :source
|
||||
sleep 0.1 while !File.exists?( @source_sock )
|
||||
awaiting = :dest
|
||||
sleep 0.1 while !File.exists?( @dest_sock )
|
||||
end
|
||||
rescue Timeout::Error
|
||||
case awaiting
|
||||
when :source
|
||||
fail "Couldn't get a source socket."
|
||||
when :dest
|
||||
fail "Couldn't get a destination socket."
|
||||
else
|
||||
fail "Something went wrong I don't understand."
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def make_files()
|
||||
FileUtils.touch(@source_file)
|
||||
File.truncate(@source_file, @size)
|
||||
FileUtils.touch(@dest_file)
|
||||
File.truncate(@dest_file, @size)
|
||||
|
||||
File.open(@source_file, "wb"){|f| f.write "a"*@size }
|
||||
end
|
||||
|
||||
|
||||
def start_mirror
|
||||
UNIXSocket.open(@source_sock) {|sock|
|
||||
sock.write(["mirror", "127.0.0.1", @dest_port.to_s, "exit"].join("\x0A") + "\x0A\x0A")
|
||||
sock.flush
|
||||
rsp = sock.readline
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
def wait_for_quit()
|
||||
Timeout.timeout( 10 ) do
|
||||
start_time = Time.now
|
||||
dst_result = Process::waitpid2(@dst_proc)
|
||||
src_result = Process::waitpid2(@src_proc)
|
||||
end
|
||||
end
|
||||
|
||||
def source_writer
|
||||
client = FlexNBD::FakeSource.new( "127.0.0.1", @source_port, "Timed out connecting" )
|
||||
offsets = Range.new(0, (@size - @write_data.size) / 4096 ).to_a
|
||||
loop do
|
||||
begin
|
||||
client.write(offsets[rand(offsets.size)] * 4096, @write_data)
|
||||
rescue => err
|
||||
# We expect a broken write at some point, so ignore it
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def assert_both_sides_identical
|
||||
# puts `md5sum #{@source_file} #{@dest_file}`
|
||||
|
||||
# Ensure each block matches
|
||||
File.open(@source_file, "r") do |source|
|
||||
File.open(@dest_file, "r") do |dest|
|
||||
0.upto( @size / 4096 ) do |block_num|
|
||||
s_data = source.read( 4096 )
|
||||
d_data = dest.read( 4096 )
|
||||
|
||||
assert s_data == d_data, "Block #{block_num} mismatch!"
|
||||
|
||||
source.seek( 4096, IO::SEEK_CUR )
|
||||
dest.seek( 4096, IO::SEEK_CUR )
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_write_during_migration
|
||||
Dir.mktmpdir() do |tmpdir|
|
||||
Dir.chdir( tmpdir ) do
|
||||
make_files()
|
||||
|
||||
launch_servers()
|
||||
|
||||
src_writer = Thread.new { source_writer }
|
||||
|
||||
start_mirror()
|
||||
wait_for_quit()
|
||||
src_writer.join
|
||||
assert_both_sides_identical
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_many_clients_during_migration
|
||||
Dir.mktmpdir() do |tmpdir|
|
||||
Dir.chdir( tmpdir ) do
|
||||
make_files()
|
||||
|
||||
launch_servers()
|
||||
|
||||
src_writers_1 = (1..5).collect { Thread.new { source_writer } }
|
||||
|
||||
start_mirror()
|
||||
|
||||
src_writers_2 = (1..5).collect { Thread.new { source_writer } }
|
||||
|
||||
wait_for_quit()
|
||||
( src_writers_1 + src_writers_2 ).each {|t| t.join }
|
||||
assert_both_sides_identical
|
||||
end
|
||||
end end
|
||||
|
||||
|
||||
end
|
||||
|
@@ -29,7 +29,7 @@ end
|
||||
|
||||
@local = File.open(testname_local, "r+")
|
||||
|
||||
@serve = FlexNBD.new(binary, "127.0.0.1", 41234)
|
||||
@serve = FlexNBD::FlexNBD.new(binary, "127.0.0.1", 41234)
|
||||
@serve.serve(testname_serve)
|
||||
|
||||
$record = []
|
||||
|
@@ -2,10 +2,15 @@
|
||||
|
||||
#include "bitset.h"
|
||||
|
||||
#define assert_bitset_is( map, val ) {\
|
||||
uint64_t *num = (uint64_t*) map->bits; \
|
||||
ck_assert_int_eq( val, *num ); \
|
||||
}
|
||||
|
||||
START_TEST(test_bit_set)
|
||||
{
|
||||
uint64_t num = 0;
|
||||
char *bits = (char*) #
|
||||
bitfield_p bits = (bitfield_p) #
|
||||
|
||||
#define TEST_BIT_SET(bit, newvalue) \
|
||||
bit_set(bits, (bit)); \
|
||||
@@ -22,7 +27,7 @@ END_TEST
|
||||
START_TEST(test_bit_clear)
|
||||
{
|
||||
uint64_t num = 0xffffffffffffffff;
|
||||
char *bits = (char*) #
|
||||
bitfield_p bits = (bitfield_p) #
|
||||
|
||||
#define TEST_BIT_CLEAR(bit, newvalue) \
|
||||
bit_clear(bits, (bit)); \
|
||||
@@ -39,7 +44,7 @@ END_TEST
|
||||
START_TEST(test_bit_tests)
|
||||
{
|
||||
uint64_t num = 0x5555555555555555;
|
||||
char *bits = (char*) #
|
||||
bitfield_p bits = (bitfield_p) #
|
||||
|
||||
fail_unless(bit_has_value(bits, 0, 1), "bit_has_value malfunction");
|
||||
fail_unless(bit_has_value(bits, 1, 0), "bit_has_value malfunction");
|
||||
@@ -53,8 +58,8 @@ END_TEST
|
||||
|
||||
START_TEST(test_bit_ranges)
|
||||
{
|
||||
char buffer[4160];
|
||||
uint64_t *longs = (unsigned long*) buffer;
|
||||
bitfield_word_t buffer[BIT_WORDS_FOR_SIZE(4160)];
|
||||
uint64_t *longs = (uint64_t *) buffer;
|
||||
uint64_t i;
|
||||
|
||||
memset(buffer, 0, 4160);
|
||||
@@ -62,9 +67,9 @@ START_TEST(test_bit_ranges)
|
||||
for (i=0; i<64; i++) {
|
||||
bit_set_range(buffer, i*64, i);
|
||||
fail_unless(
|
||||
longs[i] == (1UL<<i)-1,
|
||||
longs[i] == (1ULL<<i)-1,
|
||||
"longs[%ld] = %lx SHOULD BE %lx",
|
||||
i, longs[i], (1L<<i)-1
|
||||
i, longs[i], (1ULL<<i)-1
|
||||
);
|
||||
|
||||
fail_unless(longs[i+1] == 0, "bit_set_range overshot at i=%d", i);
|
||||
@@ -79,7 +84,7 @@ END_TEST
|
||||
|
||||
START_TEST(test_bit_runs)
|
||||
{
|
||||
char buffer[256];
|
||||
bitfield_word_t buffer[BIT_WORDS_FOR_SIZE(256)];
|
||||
int i, ptr=0, runs[] = {
|
||||
56,97,22,12,83,1,45,80,85,51,64,40,63,67,75,64,94,81,79,62
|
||||
};
|
||||
@@ -95,7 +100,7 @@ START_TEST(test_bit_runs)
|
||||
ptr = 0;
|
||||
|
||||
for (i=0; i < 20; i += 1) {
|
||||
int run = bit_run_count(buffer, ptr, 2048-ptr);
|
||||
int run = bit_run_count(buffer, ptr, 2048-ptr, NULL);
|
||||
fail_unless(
|
||||
run == runs[i],
|
||||
"run %d should have been %d, was %d",
|
||||
@@ -108,7 +113,7 @@ END_TEST
|
||||
|
||||
START_TEST(test_bitset)
|
||||
{
|
||||
struct bitset_mapping* map;
|
||||
struct bitset * map;
|
||||
uint64_t *num;
|
||||
|
||||
map = bitset_alloc(6400, 100);
|
||||
@@ -143,23 +148,40 @@ END_TEST
|
||||
|
||||
START_TEST( test_bitset_set )
|
||||
{
|
||||
struct bitset_mapping* map;
|
||||
uint64_t *num;
|
||||
struct bitset * map;
|
||||
uint64_t run;
|
||||
|
||||
map = bitset_alloc(64, 1);
|
||||
num = (uint64_t*) map->bits;
|
||||
|
||||
ck_assert_int_eq( 0x0000000000000000, *num );
|
||||
assert_bitset_is( map, 0x0000000000000000 );
|
||||
bitset_set( map );
|
||||
ck_assert_int_eq( 0xffffffffffffffff, *num );
|
||||
assert_bitset_is( map, 0xffffffffffffffff );
|
||||
bitset_free( map );
|
||||
|
||||
map = bitset_alloc( 6400, 100 );
|
||||
assert_bitset_is( map, 0x0000000000000000 );
|
||||
bitset_set( map );
|
||||
assert_bitset_is( map, 0xffffffffffffffff );
|
||||
bitset_free( map );
|
||||
|
||||
// Now do something large and representative
|
||||
map = bitset_alloc( 53687091200, 4096 );
|
||||
bitset_set( map );
|
||||
|
||||
run = bitset_run_count( map, 0, 53687091200 );
|
||||
ck_assert_int_eq( run, 53687091200 );
|
||||
bitset_free( map );
|
||||
|
||||
|
||||
}
|
||||
END_TEST
|
||||
|
||||
|
||||
START_TEST( test_bitset_clear )
|
||||
{
|
||||
struct bitset_mapping* map;
|
||||
struct bitset * map;
|
||||
uint64_t *num;
|
||||
uint64_t run;
|
||||
|
||||
map = bitset_alloc(64, 1);
|
||||
num = (uint64_t*) map->bits;
|
||||
@@ -168,26 +190,300 @@ START_TEST( test_bitset_clear )
|
||||
bitset_set( map );
|
||||
bitset_clear( map );
|
||||
ck_assert_int_eq( 0x0000000000000000, *num );
|
||||
|
||||
bitset_free( map );
|
||||
|
||||
// Now do something large and representative
|
||||
map = bitset_alloc( 53687091200, 4096 );
|
||||
bitset_set( map );
|
||||
bitset_clear( map );
|
||||
run = bitset_run_count( map, 0, 53687091200 );
|
||||
ck_assert_int_eq( run, 53687091200 );
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST( test_bitset_set_range )
|
||||
{
|
||||
struct bitset* map = bitset_alloc( 64, 1 );
|
||||
assert_bitset_is( map, 0x0000000000000000 );
|
||||
|
||||
bitset_set_range( map, 8, 8 );
|
||||
assert_bitset_is( map, 0x000000000000ff00 );
|
||||
|
||||
bitset_clear( map );
|
||||
assert_bitset_is( map, 0x0000000000000000 );
|
||||
bitset_set_range( map, 0, 0 );
|
||||
assert_bitset_is( map, 0x0000000000000000 );
|
||||
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST( test_bitset_clear_range )
|
||||
{
|
||||
struct bitset* map = bitset_alloc( 64, 1 );
|
||||
bitset_set( map );
|
||||
assert_bitset_is( map, 0xffffffffffffffff );
|
||||
|
||||
bitset_clear_range( map, 8, 8 );
|
||||
assert_bitset_is( map, 0xffffffffffff00ff );
|
||||
|
||||
bitset_set( map );
|
||||
assert_bitset_is( map, 0xffffffffffffffff );
|
||||
bitset_clear_range( map, 0, 0 );
|
||||
assert_bitset_is( map, 0xffffffffffffffff );
|
||||
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST( test_bitset_run_count )
|
||||
{
|
||||
struct bitset* map = bitset_alloc( 64, 1 );
|
||||
uint64_t run;
|
||||
|
||||
assert_bitset_is( map, 0x0000000000000000 );
|
||||
|
||||
run = bitset_run_count( map, 0, 64 );
|
||||
ck_assert_int_eq( 64, run );
|
||||
|
||||
bitset_set_range( map, 0, 32 );
|
||||
assert_bitset_is( map, 0x00000000ffffffff );
|
||||
|
||||
run = bitset_run_count( map, 0, 64 );
|
||||
ck_assert_int_eq( 32, run );
|
||||
|
||||
run = bitset_run_count( map, 0, 16 );
|
||||
ck_assert_int_eq( 16, run );
|
||||
|
||||
run = bitset_run_count( map, 16, 64 );
|
||||
ck_assert_int_eq( 16, run );
|
||||
|
||||
run = bitset_run_count( map, 31, 64 );
|
||||
ck_assert_int_eq( 1, run );
|
||||
|
||||
run = bitset_run_count( map, 32, 64 );
|
||||
ck_assert_int_eq( 32, run );
|
||||
|
||||
run = bitset_run_count( map, 32, 32 );
|
||||
ck_assert_int_eq( 32, run );
|
||||
|
||||
run = bitset_run_count( map, 32, 16 );
|
||||
ck_assert_int_eq( 16, run );
|
||||
|
||||
bitset_free( map );
|
||||
|
||||
map = bitset_alloc( 6400, 100 );
|
||||
assert_bitset_is( map, 0x0000000000000000 );
|
||||
|
||||
run = bitset_run_count( map, 0, 6400 );
|
||||
ck_assert_int_eq( 6400, run );
|
||||
|
||||
bitset_set_range( map, 0, 3200 );
|
||||
|
||||
run = bitset_run_count( map, 0, 6400 );
|
||||
ck_assert_int_eq( 3200, run );
|
||||
|
||||
run = bitset_run_count( map, 1, 6400 );
|
||||
ck_assert_int_eq( 3199, run );
|
||||
|
||||
run = bitset_run_count( map, 3200, 6400 );
|
||||
ck_assert_int_eq( 3200, run );
|
||||
|
||||
run = bitset_run_count( map, 6500, 6400 );
|
||||
ck_assert_int_eq( 0, run );
|
||||
bitset_free( map );
|
||||
|
||||
// Now do something large and representative
|
||||
map = bitset_alloc( 53687091200, 4096 );
|
||||
bitset_set( map );
|
||||
run = bitset_run_count( map, 0, 53687091200 );
|
||||
ck_assert_int_eq( run, 53687091200 );
|
||||
|
||||
bitset_free( map );
|
||||
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST( test_bitset_set_range_doesnt_push_to_stream )
|
||||
{
|
||||
struct bitset *map = bitset_alloc( 64, 1 );
|
||||
bitset_set_range( map, 0, 64 );
|
||||
ck_assert_int_eq( 0, bitset_stream_size( map ) );
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST( test_bitset_clear_range_doesnt_push_to_stream )
|
||||
{
|
||||
struct bitset *map = bitset_alloc( 64, 1 );
|
||||
bitset_clear_range( map, 0, 64 );
|
||||
ck_assert_int_eq( 0, bitset_stream_size( map ) );
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_bitset_enable_stream)
|
||||
{
|
||||
struct bitset *map = bitset_alloc( 64, 1 );
|
||||
struct bitset_stream_entry result;
|
||||
memset( &result, 0, sizeof( result ) );
|
||||
|
||||
bitset_enable_stream( map );
|
||||
|
||||
ck_assert_int_eq( 1, map->stream_enabled );
|
||||
|
||||
bitset_stream_dequeue( map, &result );
|
||||
|
||||
ck_assert_int_eq( BITSET_STREAM_ON, result.event );
|
||||
ck_assert_int_eq( 0, result.from );
|
||||
ck_assert_int_eq( 64, result.len );
|
||||
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_bitset_disable_stream)
|
||||
{
|
||||
struct bitset *map = bitset_alloc( 64, 1 );
|
||||
struct bitset_stream_entry result;
|
||||
memset( &result, 0, sizeof( result ) );
|
||||
|
||||
bitset_enable_stream( map );
|
||||
bitset_disable_stream( map );
|
||||
|
||||
ck_assert_int_eq( 0, map->stream_enabled );
|
||||
ck_assert_int_eq( 2, bitset_stream_size( map ) );
|
||||
|
||||
bitset_stream_dequeue( map, NULL ); // ON
|
||||
bitset_stream_dequeue( map, &result ); // OFF
|
||||
|
||||
ck_assert_int_eq( BITSET_STREAM_OFF, result.event );
|
||||
ck_assert_int_eq( 0, result.from );
|
||||
ck_assert_int_eq( 64, result.len );
|
||||
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_bitset_stream_with_set_range)
|
||||
{
|
||||
struct bitset *map = bitset_alloc( 64, 1 );
|
||||
struct bitset_stream_entry result;
|
||||
memset( &result, 0, sizeof( result ) );
|
||||
|
||||
bitset_enable_stream( map );
|
||||
bitset_set_range( map, 0, 32 );
|
||||
|
||||
ck_assert_int_eq( 2, bitset_stream_size( map ) );
|
||||
|
||||
bitset_stream_dequeue( map, NULL ); // ON
|
||||
bitset_stream_dequeue( map, &result ); // SET
|
||||
|
||||
ck_assert_int_eq( BITSET_STREAM_SET, result.event );
|
||||
ck_assert_int_eq( 0, result.from );
|
||||
ck_assert_int_eq( 32, result.len );
|
||||
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_bitset_stream_with_clear_range)
|
||||
{
|
||||
struct bitset *map = bitset_alloc( 64, 1 );
|
||||
struct bitset_stream_entry result;
|
||||
memset( &result, 0, sizeof( result ) );
|
||||
|
||||
bitset_enable_stream( map );
|
||||
bitset_clear_range( map, 0, 32 );
|
||||
ck_assert_int_eq( 2, bitset_stream_size( map ) );
|
||||
|
||||
bitset_stream_dequeue( map, NULL ); // ON
|
||||
bitset_stream_dequeue( map, &result ); // UNSET
|
||||
|
||||
ck_assert_int_eq( BITSET_STREAM_UNSET, result.event );
|
||||
ck_assert_int_eq( 0, result.from );
|
||||
ck_assert_int_eq( 32, result.len );
|
||||
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_bitset_stream_size)
|
||||
{
|
||||
struct bitset *map = bitset_alloc( 64, 1 );
|
||||
bitset_enable_stream( map );
|
||||
bitset_set_range( map, 0, 32 );
|
||||
bitset_set_range( map, 16, 32 );
|
||||
bitset_set_range( map, 7, 16 );
|
||||
|
||||
bitset_clear_range( map, 0, 32 );
|
||||
bitset_clear_range( map, 16, 32 );
|
||||
bitset_clear_range( map, 48, 16 );
|
||||
bitset_disable_stream( map );
|
||||
|
||||
ck_assert_int_eq( 8, bitset_stream_size( map ) );
|
||||
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_bitset_stream_queued_bytes)
|
||||
{
|
||||
struct bitset *map = bitset_alloc( 64, 1 );
|
||||
bitset_enable_stream( map );
|
||||
bitset_set_range( map, 0, 32 );
|
||||
bitset_set_range( map, 16, 32 );
|
||||
bitset_set_range( map, 7, 16 );
|
||||
|
||||
bitset_clear_range( map, 0, 32 );
|
||||
bitset_clear_range( map, 16, 32 );
|
||||
bitset_clear_range( map, 48, 16 );
|
||||
bitset_clear_range( map, 0, 2 );
|
||||
bitset_disable_stream( map );
|
||||
|
||||
ck_assert_int_eq( 64, bitset_stream_queued_bytes( map, BITSET_STREAM_ON ) );
|
||||
ck_assert_int_eq( 80, bitset_stream_queued_bytes( map, BITSET_STREAM_SET ) );
|
||||
ck_assert_int_eq( 82, bitset_stream_queued_bytes( map, BITSET_STREAM_UNSET ) );
|
||||
ck_assert_int_eq( 64, bitset_stream_queued_bytes( map, BITSET_STREAM_OFF ) );
|
||||
bitset_free( map );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
Suite* bitset_suite(void)
|
||||
{
|
||||
Suite *s = suite_create("bitset");
|
||||
|
||||
TCase *tc_bit = tcase_create("bit");
|
||||
TCase *tc_bitset = tcase_create("bitset");
|
||||
tcase_add_test(tc_bit, test_bit_set);
|
||||
tcase_add_test(tc_bit, test_bit_clear);
|
||||
tcase_add_test(tc_bit, test_bit_tests);
|
||||
tcase_add_test(tc_bit, test_bit_ranges);
|
||||
tcase_add_test(tc_bit, test_bit_runs);
|
||||
suite_add_tcase(s, tc_bit);
|
||||
|
||||
TCase *tc_bitset = tcase_create("bitset");
|
||||
tcase_add_test(tc_bitset, test_bitset);
|
||||
tcase_add_test(tc_bitset, test_bitset_set);
|
||||
tcase_add_test(tc_bitset, test_bitset_clear);
|
||||
suite_add_tcase(s, tc_bit);
|
||||
tcase_add_test(tc_bitset, test_bitset_run_count);
|
||||
tcase_add_test(tc_bitset, test_bitset_set_range);
|
||||
tcase_add_test(tc_bitset, test_bitset_clear_range);
|
||||
tcase_add_test(tc_bitset, test_bitset_set_range_doesnt_push_to_stream);
|
||||
tcase_add_test(tc_bitset, test_bitset_clear_range_doesnt_push_to_stream);
|
||||
suite_add_tcase(s, tc_bitset);
|
||||
|
||||
|
||||
TCase *tc_bitset_stream = tcase_create("bitset_stream");
|
||||
tcase_add_test(tc_bitset_stream, test_bitset_enable_stream);
|
||||
tcase_add_test(tc_bitset_stream, test_bitset_disable_stream);
|
||||
tcase_add_test(tc_bitset_stream, test_bitset_stream_with_set_range);
|
||||
tcase_add_test(tc_bitset_stream, test_bitset_stream_with_clear_range);
|
||||
tcase_add_test(tc_bitset_stream, test_bitset_stream_size);
|
||||
tcase_add_test(tc_bitset_stream, test_bitset_stream_queued_bytes);
|
||||
suite_add_tcase(s, tc_bitset_stream);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@@ -76,8 +76,8 @@ START_TEST( test_read_request_quits_on_stop_signal )
|
||||
|
||||
client_signal_stop( c );
|
||||
|
||||
int client_read_request( struct client *, struct nbd_request *);
|
||||
fail_unless( 0 == client_read_request( c, &nbdr ), "Didn't quit on stop." );
|
||||
int client_serve_request( struct client *);
|
||||
fail_unless( 1 == client_serve_request( c ), "Didn't quit on stop." );
|
||||
|
||||
close( fds[0] );
|
||||
close( fds[1] );
|
||||
|
@@ -7,15 +7,12 @@ START_TEST( test_listening_assigns_sock )
|
||||
{
|
||||
struct flexnbd * flexnbd = flexnbd_create_listening(
|
||||
"127.0.0.1",
|
||||
NULL,
|
||||
"4777",
|
||||
NULL,
|
||||
"fakefile",
|
||||
"fakesock",
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
1 );
|
||||
NULL );
|
||||
fail_if( NULL == flexnbd->control->socket_name, "No socket was copied" );
|
||||
}
|
||||
END_TEST
|
||||
|
@@ -1,57 +0,0 @@
|
||||
#include "serve.h"
|
||||
#include "listen.h"
|
||||
#include "util.h"
|
||||
#include "flexnbd.h"
|
||||
|
||||
#include <check.h>
|
||||
#include <string.h>
|
||||
|
||||
START_TEST( test_defaults_main_serve_opts )
|
||||
{
|
||||
struct flexnbd flexnbd;
|
||||
struct listen * listen = listen_create( &flexnbd, "127.0.0.1", NULL, "4777", NULL,
|
||||
"foo", 0, 0, NULL, 1 );
|
||||
NULLCHECK( listen );
|
||||
struct server *init_serve = listen->init_serve;
|
||||
struct server *main_serve = listen->main_serve;
|
||||
NULLCHECK( init_serve );
|
||||
NULLCHECK( main_serve );
|
||||
|
||||
fail_unless( 0 == memcmp(&init_serve->bind_to,
|
||||
&main_serve->bind_to,
|
||||
sizeof( union mysockaddr )),
|
||||
"Main serve bind_to was not set" );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
|
||||
Suite* listen_suite(void)
|
||||
{
|
||||
Suite *s = suite_create("listen");
|
||||
TCase *tc_create = tcase_create("create");
|
||||
|
||||
tcase_add_exit_test(tc_create, test_defaults_main_serve_opts, 0);
|
||||
|
||||
suite_add_tcase(s, tc_create);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
# define LOG_LEVEL 0
|
||||
#else
|
||||
# define LOG_LEVEL 2
|
||||
#endif
|
||||
|
||||
int main(void)
|
||||
{
|
||||
log_level = LOG_LEVEL;
|
||||
int number_failed;
|
||||
Suite *s = listen_suite();
|
||||
SRunner *sr = srunner_create(s);
|
||||
srunner_run_all(sr, CK_NORMAL);
|
||||
number_failed = srunner_ntests_failed(sr);
|
||||
srunner_free(sr);
|
||||
return (number_failed == 0) ? 0 : 1;
|
||||
}
|
||||
|
@@ -66,9 +66,9 @@ START_TEST( test_receive_blocks_until_post )
|
||||
END_TEST
|
||||
|
||||
|
||||
Suite* acl_suite(void)
|
||||
Suite* mbox_suite(void)
|
||||
{
|
||||
Suite *s = suite_create("acl");
|
||||
Suite *s = suite_create("mbox");
|
||||
TCase *tc_create = tcase_create("create");
|
||||
TCase *tc_post = tcase_create("post");
|
||||
|
||||
@@ -93,7 +93,7 @@ int main(void)
|
||||
log_level = 2;
|
||||
#endif
|
||||
int number_failed;
|
||||
Suite *s = acl_suite();
|
||||
Suite *s = mbox_suite();
|
||||
SRunner *sr = srunner_create(s);
|
||||
srunner_run_all(sr, CK_NORMAL);
|
||||
log_level = 0;
|
||||
|
@@ -88,14 +88,14 @@ START_TEST(test_request_handle)
|
||||
struct nbd_request_raw request_raw;
|
||||
struct nbd_request request;
|
||||
|
||||
memcpy( request_raw.handle, "MYHANDLE", 8 );
|
||||
memcpy( request_raw.handle.b, "MYHANDLE", 8 );
|
||||
|
||||
nbd_r2h_request( &request_raw, &request );
|
||||
memset( request_raw.handle, 0, 8 );
|
||||
request_raw.handle.w = 0;
|
||||
nbd_h2r_request( &request, &request_raw );
|
||||
|
||||
fail_unless( memcmp( request.handle, "MYHANDLE", 8 ) == 0, "The handle was not copied." );
|
||||
fail_unless( memcmp( request_raw.handle, "MYHANDLE", 8 ) == 0, "The handle was not copied back." );
|
||||
fail_unless( memcmp( request.handle.b, "MYHANDLE", 8 ) == 0, "The handle was not copied." );
|
||||
fail_unless( memcmp( request_raw.handle.b, "MYHANDLE", 8 ) == 0, "The handle was not copied back." );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
@@ -170,14 +170,14 @@ START_TEST(test_reply_handle)
|
||||
struct nbd_reply_raw reply_raw;
|
||||
struct nbd_reply reply;
|
||||
|
||||
memcpy( reply_raw.handle, "MYHANDLE", 8 );
|
||||
memcpy( reply_raw.handle.b, "MYHANDLE", 8 );
|
||||
|
||||
nbd_r2h_reply( &reply_raw, &reply );
|
||||
memset( reply_raw.handle, 0, 8 );
|
||||
reply_raw.handle.w = 0;
|
||||
nbd_h2r_reply( &reply, &reply_raw );
|
||||
|
||||
fail_unless( memcmp( reply.handle, "MYHANDLE", 8 ) == 0, "The handle was not copied." );
|
||||
fail_unless( memcmp( reply_raw.handle, "MYHANDLE", 8 ) == 0, "The handle was not copied back." );
|
||||
fail_unless( memcmp( reply.handle.b, "MYHANDLE", 8 ) == 0, "The handle was not copied." );
|
||||
fail_unless( memcmp( reply_raw.handle.b, "MYHANDLE", 8 ) == 0, "The handle was not copied back." );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
@@ -188,14 +188,15 @@ START_TEST( test_convert_from )
|
||||
* nbd_request_raw */
|
||||
struct nbd_request_raw request_raw;
|
||||
struct nbd_request request;
|
||||
char readbuf[] = {0x80, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
memcpy( &request_raw.from, readbuf, 8 );
|
||||
uint64_t target = 0x8000000000000000;
|
||||
|
||||
/* this is stored big-endian */
|
||||
request_raw.from = htobe64(target);
|
||||
|
||||
/* We expect this to convert big-endian to the host format */
|
||||
nbd_r2h_request( &request_raw, &request );
|
||||
|
||||
uint64_t target = 1;
|
||||
target <<= 63;
|
||||
fail_unless( target == request.from, "from was wrong" );
|
||||
}
|
||||
END_TEST
|
||||
|
@@ -9,6 +9,9 @@
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
|
||||
@@ -19,7 +22,7 @@
|
||||
|
||||
|
||||
int fd_read_request( int, struct nbd_request_raw *);
|
||||
int fd_write_reply( int, char *, int );
|
||||
int fd_write_reply( int, uint64_t, int );
|
||||
|
||||
int marker;
|
||||
|
||||
@@ -43,8 +46,7 @@ void * responder( void *respond_uncast )
|
||||
struct respond * resp = (struct respond *) respond_uncast;
|
||||
int sock_fd = resp->sock_fds[1];
|
||||
struct nbd_request_raw request_raw;
|
||||
char wrong_handle[] = "WHOOPSIE";
|
||||
|
||||
uint64_t wrong_handle = 0x80;
|
||||
|
||||
if( fd_read_request( sock_fd, &request_raw ) == -1){
|
||||
fprintf(stderr, "Problem with fd_read_request\n");
|
||||
@@ -54,8 +56,9 @@ void * responder( void *respond_uncast )
|
||||
fd_write_reply( sock_fd, wrong_handle, 0 );
|
||||
}
|
||||
else {
|
||||
fd_write_reply( sock_fd, resp->received.handle, 0 );
|
||||
fd_write_reply( sock_fd, resp->received.handle.w, 0 );
|
||||
}
|
||||
write( sock_fd, "12345678", 8 );
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@@ -85,14 +88,16 @@ void respond_destroy( struct respond * respond ){
|
||||
}
|
||||
|
||||
|
||||
void * entruster( void * nothing __attribute__((unused)))
|
||||
void * reader( void * nothing __attribute__((unused)))
|
||||
{
|
||||
DECLARE_ERROR_CONTEXT( error_context );
|
||||
error_set_handler( (cleanup_handler *)error_marker, error_context );
|
||||
|
||||
struct respond * respond = respond_create( 1 );
|
||||
int devnull = open("/dev/null", O_WRONLY);
|
||||
char outbuf[8] = {0};
|
||||
|
||||
socket_nbd_entrust( respond->sock_fds[0] );
|
||||
socket_nbd_read( respond->sock_fds[0], 0, 8, devnull, outbuf, 1 );
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@@ -101,13 +106,14 @@ START_TEST( test_rejects_mismatched_handle )
|
||||
{
|
||||
|
||||
error_init();
|
||||
pthread_t entruster_thread;
|
||||
pthread_t reader_thread;
|
||||
|
||||
log_level=5;
|
||||
|
||||
marker = 0;
|
||||
pthread_create( &entruster_thread, NULL, entruster, NULL );
|
||||
FATAL_UNLESS( 0 == pthread_join( entruster_thread, NULL ), "pthread_join failed");
|
||||
pthread_create( &reader_thread, NULL, reader, NULL );
|
||||
FATAL_UNLESS( 0 == pthread_join( reader_thread, NULL ),
|
||||
"pthread_join failed");
|
||||
|
||||
log_level=2;
|
||||
|
||||
@@ -120,19 +126,10 @@ START_TEST( test_accepts_matched_handle )
|
||||
{
|
||||
struct respond * respond = respond_create( 0 );
|
||||
|
||||
socket_nbd_entrust( respond->sock_fds[0] );
|
||||
int devnull = open("/dev/null", O_WRONLY);
|
||||
char outbuf[8] = {0};
|
||||
|
||||
respond_destroy( respond );
|
||||
}
|
||||
END_TEST
|
||||
|
||||
|
||||
START_TEST( test_entrust_type_sent )
|
||||
{
|
||||
struct respond * respond = respond_create( 0 );
|
||||
|
||||
socket_nbd_entrust( respond->sock_fds[0] );
|
||||
fail_unless( respond->received.type == REQUEST_ENTRUST, "Wrong type sent." );
|
||||
socket_nbd_read( respond->sock_fds[0], 0, 8, devnull, outbuf, 1 );
|
||||
|
||||
respond_destroy( respond );
|
||||
}
|
||||
@@ -152,14 +149,13 @@ END_TEST
|
||||
|
||||
Suite* readwrite_suite(void)
|
||||
{
|
||||
Suite *s = suite_create("acl");
|
||||
Suite *s = suite_create("readwrite");
|
||||
TCase *tc_transfer = tcase_create("entrust");
|
||||
TCase *tc_disconnect = tcase_create("disconnect");
|
||||
|
||||
|
||||
tcase_add_test(tc_transfer, test_rejects_mismatched_handle);
|
||||
tcase_add_exit_test(tc_transfer, test_accepts_matched_handle, 0);
|
||||
tcase_add_test( tc_transfer, test_entrust_type_sent );
|
||||
|
||||
/* This test is a little funny. We respond with a dodgy handle
|
||||
* and check that this *doesn't* cause a message rejection,
|
||||
|
@@ -64,7 +64,7 @@ START_TEST( test_replaces_acl )
|
||||
{
|
||||
struct flexnbd flexnbd;
|
||||
flexnbd.signal_fd = -1;
|
||||
struct server * s = server_create( &flexnbd, "127.0.0.1", "0", dummy_file, 0, 0, NULL, 1, 1 );
|
||||
struct server * s = server_create( &flexnbd, "127.0.0.1", "0", dummy_file, 0, 0, NULL, 1, 0, 1 );
|
||||
struct acl * new_acl = acl_create( 0, NULL, 0 );
|
||||
|
||||
server_replace_acl( s, new_acl );
|
||||
@@ -79,7 +79,7 @@ START_TEST( test_signals_acl_updated )
|
||||
{
|
||||
struct flexnbd flexnbd;
|
||||
flexnbd.signal_fd = -1;
|
||||
struct server * s = server_create( &flexnbd, "127.0.0.1", "0", dummy_file, 0, 0, NULL, 1, 1 );
|
||||
struct server * s = server_create( &flexnbd, "127.0.0.1", "0", dummy_file, 0, 0, NULL, 1, 0, 1 );
|
||||
struct acl * new_acl = acl_create( 0, NULL, 0 );
|
||||
|
||||
server_replace_acl( s, new_acl );
|
||||
@@ -93,7 +93,7 @@ END_TEST
|
||||
|
||||
int connect_client( char *addr, int actual_port, char *source_addr )
|
||||
{
|
||||
int client_fd;
|
||||
int client_fd = -1;
|
||||
|
||||
struct addrinfo hint;
|
||||
struct addrinfo *ailist, *aip;
|
||||
@@ -148,7 +148,7 @@ START_TEST( test_acl_update_closes_bad_client )
|
||||
*/
|
||||
struct flexnbd flexnbd;
|
||||
flexnbd.signal_fd = -1;
|
||||
struct server * s = server_create( &flexnbd, "127.0.0.7", "0", dummy_file, 0, 0, NULL, 1, 1 );
|
||||
struct server * s = server_create( &flexnbd, "127.0.0.7", "0", dummy_file, 0, 0, NULL, 1, 0, 1 );
|
||||
struct acl * new_acl = acl_create( 0, NULL, 1 );
|
||||
struct client * c;
|
||||
struct client_tbl_entry * entry;
|
||||
@@ -193,7 +193,7 @@ START_TEST( test_acl_update_leaves_good_client )
|
||||
struct flexnbd flexnbd;
|
||||
flexnbd.signal_fd = -1;
|
||||
|
||||
struct server * s = server_create( &flexnbd, "127.0.0.7", "0", dummy_file, 0, 0, NULL, 1, 1 );
|
||||
struct server * s = server_create( &flexnbd, "127.0.0.7", "0", dummy_file, 0, 0, NULL, 1, 0, 1 );
|
||||
|
||||
char *lines[] = {"127.0.0.1"};
|
||||
struct acl * new_acl = acl_create( 1, lines, 1 );
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user