quark

quark web server
git clone git://git.suckless.org/quark
Log | Files | Refs | LICENSE

commit 319ba7083fdde836d6614c6b8b228bf3a9849e95
parent c6a9055e5a30be570e30da8d216c39662c3a3f99
Author: Laslo Hunhold <dev@frign.de>
Date:   Sat, 30 Jan 2021 12:53:00 +0100

Ignore queries and fragments in URIs

Previously, a request for "/index.html" would yield a 200, while a
request for "/index.html?foo=bar" would yield a 404, as quark would
look for the file "index.html?foo=bar" in the serve directory.

To accomodate this behaviour, it's no longer sufficient to just compare
realuri and req->uri. Instead, we set a "dirty" flag every time we
change the URI in such a way that it requires a redirect.

According to RFC 3986 section 3, queries and fragments are there
to (further) "identify a resource within the scope of the URI's scheme
and naming authority (if any)". However, it's perfectly legitimate to
just ignore this further specification when the URI itself is already
pointing at a unique resource (i.e. "/index.html").

This behaviour is consistent with dynamic web applications which usually
ignore parameters they don't care about. Quark is too much Zen to care
about any parameters. This has the added bonus that you can now clone
repositories (read-only) via the "dumb" HTTP git-protocol, so

	git clone https://example.org/git/project.git

is now possible (provided you run update-server-info during the
post-update-hook). This wouldn't work previously because git, when
asked to clone via HTTP, would first probe the server with a request for

	project.git/info/refs?service=git-upload-pack

(i.e. asking for the "smart" HTTP git-protocol to confirm). Quark would
return a 404, though, while git only gracefully "downgrades" to the
"dumb" HTTP git-protocol if the request succeeds but only yields a basic
200 response without special git-headers.

This way, it is now trivial to also share git-repositories (and other
gracefully-downgrading protocols). While the "dumb" HTTP git-protocol
only supports read-only-access, I don't think that's much of an overall
loss (to the contrary!).

HTTP authentication is broken and it makes much more sense to enable
ssh-access to contributors and make them push changes via ssh. The key
advantage of HTTP-cloning over git://-cloning is the fact that the git
protocol can be tampered with, while the HTTP-protocol can be encapsulated
into a secure TLS connection.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mhttp.c | 69+++++++++++++++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 51 insertions(+), 18 deletions(-)

diff --git a/http.c b/http.c @@ -368,12 +368,12 @@ static int normabspath(char *path) { size_t len; - int last = 0; + int dirty = 0, last = 0; char *p, *q; /* require and skip first slash */ if (path[0] != '/') { - return 1; + return -1; } p = path + 1; @@ -387,7 +387,9 @@ normabspath(char *path) last = 1; } - if (p == q || (q - p == 1 && p[0] == '.')) { + if (*p == '\0') { + break; + } else if (p == q || (q - p == 1 && p[0] == '.')) { /* "/" or "./" */ goto squash; } else if (q - p == 2 && p[0] == '.' && p[1] == '.') { @@ -412,9 +414,10 @@ squash: memmove(p, q + 1, len - ((q + 1) - path) + 2); len -= (q + 1) - p; } + dirty = 1; } - return 0; + return dirty; } static enum status @@ -562,7 +565,7 @@ http_prepare_response(const struct request *req, struct response *res, struct tm tm = { 0 }; struct vhost *vhost; size_t len, i; - int hasport, ipv6host; + int dirty = 0, hasport, ipv6host; static char realuri[PATH_MAX], tmpuri[PATH_MAX]; char *p, *mime; const char *targethost; @@ -570,11 +573,29 @@ http_prepare_response(const struct request *req, struct response *res, /* empty all response fields */ memset(res, 0, sizeof(*res)); - /* make a working copy of the URI and normalize it */ + /* + * make a working copy of the URI, strip queries and fragments + * (ignorable according to RFC 3986 section 3) and normalize it + */ memcpy(realuri, req->uri, sizeof(realuri)); - if (normabspath(realuri)) { + + if ((p = strchr(realuri, '?'))) { + *p = '\0'; + } else if ((p = strchr(realuri, '#'))) { + *p = '\0'; + } + + switch (normabspath(realuri)) { + case -1: s = S_BAD_REQUEST; goto err; + case 0: + /* string is unchanged */ + break; + case 1: + /* string was changed */ + dirty = 1; + break; } /* match vhost */ @@ -594,10 +615,12 @@ http_prepare_response(const struct request *req, struct response *res, } /* if we have a vhost prefix, prepend it to the URI */ - if (vhost->prefix && - prepend(realuri, LEN(realuri), vhost->prefix)) { - s = S_REQUEST_TOO_LARGE; - goto err; + if (vhost->prefix) { + if (prepend(realuri, LEN(realuri), vhost->prefix)) { + s = S_REQUEST_TOO_LARGE; + goto err; + } + dirty = 1; } } @@ -618,14 +641,23 @@ http_prepare_response(const struct request *req, struct response *res, s = S_REQUEST_TOO_LARGE; goto err; } + dirty = 1; break; } } /* normalize URI again, in case we introduced dirt */ - if (normabspath(realuri)) { + switch (normabspath(realuri)) { + case -1: s = S_BAD_REQUEST; goto err; + case 0: + /* string is unchanged */ + break; + case 1: + /* string was changed */ + dirty = 1; + break; } /* stat the relative path derived from the URI */ @@ -644,6 +676,7 @@ http_prepare_response(const struct request *req, struct response *res, if (len > 0 && realuri[len - 1] != '/') { realuri[len] = '/'; realuri[len + 1] = '\0'; + dirty = 1; } } @@ -658,10 +691,10 @@ http_prepare_response(const struct request *req, struct response *res, } /* - * redirect if the original URI and the "real" URI differ or if - * the requested host is non-canonical + * redirect if the URI needs to be redirected or the requested + * host is non-canonical */ - if (strcmp(req->uri, realuri) || (srv->vhost && vhost && + if (dirty || (srv->vhost && vhost && strcmp(req->field[REQ_HOST], vhost->chost))) { res->status = S_MOVED_PERMANENTLY; @@ -716,12 +749,12 @@ http_prepare_response(const struct request *req, struct response *res, * (optionally including the vhost servedir as a prefix) * into the actual response-path */ - if (esnprintf(res->uri, sizeof(res->uri), "%s", req->uri)) { + if (esnprintf(res->uri, sizeof(res->uri), "%s", realuri)) { s = S_REQUEST_TOO_LARGE; goto err; } if (esnprintf(res->path, sizeof(res->path), "%s%s", - vhost ? vhost->dir : "", RELPATH(req->uri))) { + vhost ? vhost->dir : "", RELPATH(realuri))) { s = S_REQUEST_TOO_LARGE; goto err; } @@ -733,7 +766,7 @@ http_prepare_response(const struct request *req, struct response *res, * the URI */ if (esnprintf(tmpuri, sizeof(tmpuri), "%s%s", - req->uri, srv->docindex)) { + realuri, srv->docindex)) { s = S_REQUEST_TOO_LARGE; goto err; }