quark

quark web server
git clone git://git.suckless.org/quark
Log | Files | Refs | LICENSE

commit 7814309e9a2c386f646892403e53f2a87b929b0c
parent e2463e733e4880c1d8034e3b90072825509ceb69
Author: Laslo Hunhold <dev@frign.de>
Date:   Mon, 22 Feb 2021 18:39:49 +0100

Use one listening socket for all threads

Previously, we employed SO_REUSEPORT and bound as many listening
sockets as we had threads. Inside the kernel, this creates separate
queues for each socket and a hash-based-round-robin distributes incoming
requests evenly among the listening sockets.

As a result, the load was well-balanced among threads, in contrast to
sharing one listening socket (and one shared queue), because when
epoll() registers new incoming connections, it takes the last-activated
thread (FIFO). As a result, only one thread usually gets most of the
load, unless the server is really stressed.

So why change it back? The reason is latency. It's not a coincidence
that when you're at the supermarket you very often see another queue
pass along much faster than you. Even though a congestion (i.e. slow
cashier or customer) is completely random and no cashier-post is
favoured, all people in the congested queue are affected (the chance
is 2/3 to be in a slower queue when there are 3 queues).
A much more efficient approach is to have one shared queue and 3
cashiers. Even when there's a congestion at one cashier-post, the others
will continue processing customers and the overall latency is much
more consistent.
The same applies to the connection-queue: If a thread is really busy
with a request, the waiting connections will be affected without the
possibility of being processed by another idle thread.
Another reason is efficiency: The hash-based-round-robin in the kernel
adds a small overhead that can be avoided this way, it removes
complexity in the code and undoes the inconsistent "hack" where we
just gave each thread the same listening socket when we were working
with a UNIX-domain socket.

This change adds a small "regression": When quark is hammered with a
lot of connections (>15k/s), it is more motivated to drop connections
because epoll has the aforementioned FIFO-behaviour (i.e. it tends to
pass the connection-events to the same thread), which usually accepts
before processing read/write-related events. This in turn exhausts the
thread's connection-pool at some point, but because it handles that
well, we never enter a DoS-state.

This commit is a preparation for upcoming changes.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mmain.c | 36+++++++++++++++++++++++-------------
Mserver.c | 4++--
Mserver.h | 2+-
Msock.c | 122+++++++++++++++++++++++++++----------------------------------------------------
Msock.h | 4++--
5 files changed, 69 insertions(+), 99 deletions(-)

diff --git a/main.c b/main.c @@ -72,7 +72,7 @@ main(int argc, char *argv[]) .docindex = "index.html", }; size_t i; - int *insock = NULL, status = 0; + int insock, status = 0; const char *err; char *tok[4]; @@ -214,21 +214,31 @@ main(int argc, char *argv[]) } } - /* create a nonblocking listening socket for each thread */ - if (!(insock = reallocarray(insock, nthreads, sizeof(*insock)))) { - die("reallocarray:"); - } - if (udsname ? sock_get_uds_arr(udsname, pwd->pw_uid, grp->gr_gid, - insock, nthreads) : - sock_get_ips_arr(srv.host, srv.port, insock, nthreads)) { + /* + * create the (non-blocking) listening socket + * + * we could use SO_REUSEPORT and create a listening socket for + * each thread (for better load-balancing, given each thread + * would get his own kernel-queue), but this increases latency + * (as a thread might get stuck on a larger request, making all + * other request wait in line behind it). + * + * socket contention with a single listening socket is a + * non-issue and thread-load-balancing is better fixed in the + * kernel by changing epoll-sheduling from a FIFO- to a + * LIFO-model, especially as it doesn't affect performance + */ + insock = udsname ? sock_get_uds(udsname, pwd->pw_uid, grp->gr_gid) : + sock_get_ips(srv.host, srv.port); + if (sock_set_nonblocking(insock)) { return 1; } - for (i = 0; i < nthreads; i++) { - if (sock_set_nonblocking(insock[i])) { - return 1; - } - } + /* + * before dropping privileges, we fork, as we need to remove + * the UNIX-domain socket when we shut down, which we need + * privileges for + */ switch (fork()) { case -1: warn("fork:"); diff --git a/server.c b/server.c @@ -134,7 +134,7 @@ server_worker(void *data) } void -server_init_thread_pool(int *insock, size_t nthreads, size_t nslots, +server_init_thread_pool(int insock, size_t nthreads, size_t nslots, const struct server *srv) { pthread_t *thread = NULL; @@ -146,7 +146,7 @@ server_init_thread_pool(int *insock, size_t nthreads, size_t nslots, die("reallocarray:"); } for (i = 0; i < nthreads; i++) { - d[i].insock = insock[i]; + d[i].insock = insock; d[i].nslots = nslots; d[i].srv = srv; } diff --git a/server.h b/server.h @@ -30,6 +30,6 @@ struct server { size_t map_len; }; -void server_init_thread_pool(int *, size_t, size_t, const struct server *); +void server_init_thread_pool(int, size_t, size_t, const struct server *); #endif /* SERVER_H */ diff --git a/sock.c b/sock.c @@ -18,8 +18,7 @@ #include "util.h" int -sock_get_ips_arr(const char *host, const char* port, int *sockfd, - size_t sockfdlen) +sock_get_ips(const char *host, const char* port) { struct addrinfo hints = { .ai_flags = AI_NUMERICSERV, @@ -27,135 +26,96 @@ sock_get_ips_arr(const char *host, const char* port, int *sockfd, .ai_socktype = SOCK_STREAM, }; struct addrinfo *ai, *p; - int r; - size_t i, j; + int ret, insock = 0; - if ((r = getaddrinfo(host, port, &hints, &ai))) { - warn("getaddrinfo: %s", gai_strerror(r)); - return 1; + if ((ret = getaddrinfo(host, port, &hints, &ai))) { + die("getaddrinfo: %s", gai_strerror(ret)); } for (p = ai; p; p = p->ai_next) { - /* try generating sockfds */ - for (i = 0; i < sockfdlen; i++) { - if ((sockfd[i] = socket(p->ai_family, p->ai_socktype, - p->ai_protocol)) < 0) { - /* retry with the next addrinfo */ - break; - } - - /* - * set SO_REUSEPORT, so it becomes possible to bind - * to the same port with multiple sockets, which - * is what we're doing here - */ - if (setsockopt(sockfd[i], SOL_SOCKET, SO_REUSEPORT, - &(int){1}, sizeof(int)) < 0) { - warn("setsockopt:"); - return 1; - } - - if (bind(sockfd[i], p->ai_addr, p->ai_addrlen) < 0) { - /* bind failed, close all previous fd's and retry */ - for (j = 0; j <= i; j++) { - if (close(sockfd[i]) < 0) { - warn("close:"); - return 1; - } - } - break; - } + if ((insock = socket(p->ai_family, p->ai_socktype, + p->ai_protocol)) < 0) { + continue; + } + if (setsockopt(insock, SOL_SOCKET, SO_REUSEADDR, + &(int){1}, sizeof(int)) < 0) { + die("setsockopt:"); } - if (i == sockfdlen) { - /* we have generated all requested fds */ - break; + if (bind(insock, p->ai_addr, p->ai_addrlen) < 0) { + /* bind failed, close the insock and retry */ + if (close(insock) < 0) { + die("close:"); + } + continue; } + break; } freeaddrinfo(ai); if (!p) { /* we exhaustet the addrinfo-list and found no connection */ if (errno == EACCES) { - warn("You need to run as root or have " - "CAP_NET_BIND_SERVICE set to bind to " - "privileged ports"); + die("You need to run as root or have " + "CAP_NET_BIND_SERVICE set to bind to " + "privileged ports"); } else { - warn("bind:"); + die("bind:"); } - return 1; } - for (i = 0; i < sockfdlen; i++) { - if (listen(sockfd[i], SOMAXCONN) < 0) { - warn("listen:"); - return 1; - } + if (listen(insock, SOMAXCONN) < 0) { + die("listen:"); } - return 0; -} - -void -sock_rem_uds(const char *udsname) -{ - if (unlink(udsname) < 0) { - die("unlink '%s':", udsname); - } + return insock; } int -sock_get_uds_arr(const char *udsname, uid_t uid, gid_t gid, int *sockfd, - size_t sockfdlen) +sock_get_uds(const char *udsname, uid_t uid, gid_t gid) { struct sockaddr_un addr = { .sun_family = AF_UNIX, }; - size_t udsnamelen, i; + size_t udsnamelen; int insock, sockmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; if ((insock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { - warn("socket:"); - return 1; + die("socket:"); } if ((udsnamelen = strlen(udsname)) > sizeof(addr.sun_path) - 1) { - warn("UNIX-domain socket name truncated"); - return 1; + die("UNIX-domain socket name truncated"); } memcpy(addr.sun_path, udsname, udsnamelen + 1); if (bind(insock, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { - warn("bind '%s':", udsname); - return 1; + die("bind '%s':", udsname); } if (listen(insock, SOMAXCONN) < 0) { sock_rem_uds(udsname); - warn("listen:"); - return 1; + die("listen:"); } if (chmod(udsname, sockmode) < 0) { sock_rem_uds(udsname); - warn("chmod '%s':", udsname); - return 1; + die("chmod '%s':", udsname); } if (chown(udsname, uid, gid) < 0) { sock_rem_uds(udsname); - warn("chown '%s':", udsname); - return 1; + die("chown '%s':", udsname); } - for (i = 0; i < sockfdlen; i++) { - /* - * we can't bind to an AF_UNIX socket more than once, - * so we just reuse the same fd on all threads. - */ - sockfd[i] = insock; - } + return insock; +} - return 0; +void +sock_rem_uds(const char *udsname) +{ + if (unlink(udsname) < 0) { + die("unlink '%s':", udsname); + } } int diff --git a/sock.h b/sock.h @@ -6,9 +6,9 @@ #include <sys/socket.h> #include <sys/types.h> -int sock_get_ips_arr(const char *, const char *, int *, size_t); +int sock_get_ips(const char *, const char *); +int sock_get_uds(const char *, uid_t, gid_t); void sock_rem_uds(const char *); -int sock_get_uds_arr(const char *, uid_t, gid_t, int *, size_t); int sock_set_timeout(int, int); int sock_set_nonblocking(int); int sock_get_inaddr_str(const struct sockaddr_storage *, char *, size_t);