sites

public wiki contents of suckless.org
git clone git://git.suckless.org/sites
Log | Files | Refs

dwm-winicon-6.2-v1.diff (128154B)


      1 diff --git a/config.def.h b/config.def.h
      2 index 1c0b587..5385e27 100644
      3 --- a/config.def.h
      4 +++ b/config.def.h
      5 @@ -5,6 +5,8 @@ static const unsigned int borderpx  = 1;        /* border pixel of windows */
      6  static const unsigned int snap      = 32;       /* snap pixel */
      7  static const int showbar            = 1;        /* 0 means no bar */
      8  static const int topbar             = 1;        /* 0 means bottom bar */
      9 +#define ICONSIZE 20   /* icon size */
     10 +#define ICONSPACING 5 /* space between icon and title */
     11  static const char *fonts[]          = { "monospace:size=10" };
     12  static const char dmenufont[]       = "monospace:size=10";
     13  static const char col_gray1[]       = "#222222";
     14 diff --git a/config.mk b/config.mk
     15 index 6d36cb7..39edf93 100644
     16 --- a/config.mk
     17 +++ b/config.mk
     18 @@ -22,7 +22,7 @@ FREETYPEINC = /usr/include/freetype2
     19  
     20  # includes and libs
     21  INCS = -I${X11INC} -I${FREETYPEINC}
     22 -LIBS = -L${X11LIB} -lX11 ${XINERAMALIBS} ${FREETYPELIBS}
     23 +LIBS = -L${X11LIB} -lX11 ${XINERAMALIBS} ${FREETYPELIBS} -lm
     24  
     25  # flags
     26  CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_POSIX_C_SOURCE=2 -DVERSION=\"${VERSION}\" ${XINERAMAFLAGS}
     27 diff --git a/drw.c b/drw.c
     28 index 8fd1ca4..304e269 100644
     29 --- a/drw.c
     30 +++ b/drw.c
     31 @@ -378,6 +378,29 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
     32  	return x + (render ? w : 0);
     33  }
     34  
     35 +static unsigned char 
     36 +blend(unsigned char a, unsigned char x, unsigned char y) { return ((255-a)*x + a*y) / 255; }
     37 +
     38 +void
     39 +drw_img(Drw *drw, int x, int y, XImage *img, unsigned char *tmp) 
     40 +{
     41 +	if (!drw || !drw->scheme)
     42 +		return;
     43 +	int icsz = img->width * img->height, bt = drw->scheme[ColBg].pixel, i;
     44 +	unsigned char *data = (unsigned char *)img->data;
     45 +	unsigned char r = (bt & 0x000000ff), g = (bt & 0x0000ff00)>>8, b = (bt & 0x00ff0000)>>16;
     46 +	memcpy(tmp, data, icsz << 2);
     47 +	for (i = 0; i < icsz; ++i) {
     48 +		unsigned char a = data[(i<<2)|3];
     49 +		data[(i<<2)  ] = blend(a, r, data[(i<<2)  ]);
     50 +		data[(i<<2)|1] = blend(a, g, data[(i<<2)|1]);
     51 +		data[(i<<2)|2] = blend(a, b, data[(i<<2)|2]);
     52 +	}
     53 +	XPutImage(drw->dpy, drw->drawable, drw->gc, img, 0, 0, x, y, img->width, img->height);
     54 +
     55 +	memcpy(data, tmp, icsz << 2);
     56 +}
     57 +
     58  void
     59  drw_map(Drw *drw, Window win, int x, int y, unsigned int w, unsigned int h)
     60  {
     61 diff --git a/drw.h b/drw.h
     62 index 4bcd5ad..5346bec 100644
     63 --- a/drw.h
     64 +++ b/drw.h
     65 @@ -52,6 +52,7 @@ void drw_setscheme(Drw *drw, Clr *scm);
     66  /* Drawing functions */
     67  void drw_rect(Drw *drw, int x, int y, unsigned int w, unsigned int h, int filled, int invert);
     68  int drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lpad, const char *text, int invert);
     69 +void drw_img(Drw *drw, int x, int y, XImage *img, unsigned char *tmp);
     70  
     71  /* Map functions */
     72  void drw_map(Drw *drw, Window win, int x, int y, unsigned int w, unsigned int h);
     73 diff --git a/dwm.c b/dwm.c
     74 index 4465af1..c98796c 100644
     75 --- a/dwm.c
     76 +++ b/dwm.c
     77 @@ -28,6 +28,8 @@
     78  #include <stdlib.h>
     79  #include <string.h>
     80  #include <unistd.h>
     81 +#include <limits.h>
     82 +#include <stdint.h>
     83  #include <sys/types.h>
     84  #include <sys/wait.h>
     85  #include <X11/cursorfont.h>
     86 @@ -60,7 +62,7 @@
     87  /* enums */
     88  enum { CurNormal, CurResize, CurMove, CurLast }; /* cursor */
     89  enum { SchemeNorm, SchemeSel }; /* color schemes */
     90 -enum { NetSupported, NetWMName, NetWMState, NetWMCheck,
     91 +enum { NetSupported, NetWMName, NetWMIcon, NetWMState, NetWMCheck,
     92         NetWMFullscreen, NetActiveWindow, NetWMWindowType,
     93         NetWMWindowTypeDialog, NetClientList, NetLast }; /* EWMH atoms */
     94  enum { WMProtocols, WMDelete, WMState, WMTakeFocus, WMLast }; /* default atoms */
     95 @@ -93,6 +95,7 @@ struct Client {
     96  	int bw, oldbw;
     97  	unsigned int tags;
     98  	int isfixed, isfloating, isurgent, neverfocus, oldstate, isfullscreen;
     99 +	XImage *icon;
    100  	Client *next;
    101  	Client *snext;
    102  	Monitor *mon;
    103 @@ -171,6 +174,7 @@ static void focusmon(const Arg *arg);
    104  static void focusstack(const Arg *arg);
    105  static int getrootptr(int *x, int *y);
    106  static long getstate(Window w);
    107 +static XImage *geticonprop(Window win);
    108  static int gettextprop(Window w, Atom atom, char *text, unsigned int size);
    109  static void grabbuttons(Client *c, int focused);
    110  static void grabkeys(void);
    111 @@ -213,6 +217,7 @@ static void togglebar(const Arg *arg);
    112  static void togglefloating(const Arg *arg);
    113  static void toggletag(const Arg *arg);
    114  static void toggleview(const Arg *arg);
    115 +static void freeicon(Client *c);
    116  static void unfocus(Client *c, int setfocus);
    117  static void unmanage(Client *c, int destroyed);
    118  static void unmapnotify(XEvent *e);
    119 @@ -224,6 +229,7 @@ static void updatenumlockmask(void);
    120  static void updatesizehints(Client *c);
    121  static void updatestatus(void);
    122  static void updatetitle(Client *c);
    123 +static void updateicon(Client *c);
    124  static void updatewindowtype(Client *c);
    125  static void updatewmhints(Client *c);
    126  static void view(const Arg *arg);
    127 @@ -731,7 +737,9 @@ drawbar(Monitor *m)
    128  	if ((w = m->ww - sw - x) > bh) {
    129  		if (m->sel) {
    130  			drw_setscheme(drw, scheme[m == selmon ? SchemeSel : SchemeNorm]);
    131 -			drw_text(drw, x, 0, w, bh, lrpad / 2, m->sel->name, 0);
    132 +			drw_text(drw, x, 0, w, bh, lrpad / 2 + (m->sel->icon ? m->sel->icon->width + ICONSPACING : 0), m->sel->name, 0);
    133 +			static unsigned char tmp[ICONSIZE * ICONSIZE << 2];
    134 +			if (m->sel->icon) drw_img(drw, x + lrpad / 2, (bh - m->sel->icon->height) / 2, m->sel->icon, tmp);
    135  			if (m->sel->isfloating)
    136  				drw_rect(drw, x + boxs, boxs, boxw, boxw, m->sel->isfixed, 0);
    137  		} else {
    138 @@ -899,6 +907,72 @@ getstate(Window w)
    139  	return result;
    140  }
    141  
    142 +#define STB_IMAGE_RESIZE_IMPLEMENTATION
    143 +#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_BOX
    144 +#include "stb_image_resize.h"
    145 +
    146 +XImage *
    147 +geticonprop(Window win)
    148 +{
    149 +	int format;
    150 +	unsigned long n, extra, *p = NULL;
    151 +	Atom real;
    152 +
    153 +	if (XGetWindowProperty(dpy, win, netatom[NetWMIcon], 0L, LONG_MAX, False, AnyPropertyType, 
    154 +						   &real, &format, &n, &extra, (unsigned char **)&p) != Success)
    155 +		return NULL; 
    156 +	if (n == 0) { XFree(p); return NULL; }
    157 +
    158 +	unsigned long *bstp = NULL, w, h;
    159 +
    160 +	{
    161 +		const unsigned long *end = p + n;
    162 +		unsigned long *i;
    163 +		int bstd = INT_MAX, d, m;
    164 +		for (i = p; i < end; ) {
    165 +			w = *i++; h = *i++;
    166 +			m = w > h ? w : h;
    167 +			if (m >= ICONSIZE && (d = m - ICONSIZE) < bstd) { bstd = d; bstp = i; }
    168 +			i += (w * h);
    169 +		}
    170 +		if (!bstp) {
    171 +			for (i = p; i < end; ) {
    172 +				w = *i++; h = *i++;
    173 +				m = w > h ? w : h;
    174 +				if ((d = ICONSIZE - m) < bstd) { bstd = d; bstp = i; }
    175 +				i += (w * h);
    176 +			}
    177 +		}
    178 +		if (!bstp) { XFree(p); return NULL; }
    179 +	}
    180 +
    181 +	w = *(bstp - 2); h = *(bstp - 1);
    182 +
    183 +	int icw, ich;
    184 +	if (w <= h) {
    185 +		ich = ICONSIZE; icw = w * ICONSIZE / h;
    186 +		if (icw < 1) icw = 1;
    187 +		else if (icw > ICONSIZE) icw = ICONSIZE;
    188 +	}
    189 +	else {
    190 +		icw = ICONSIZE; ich = h * ICONSIZE / w;
    191 +		if (ich < 1) ich = 1;
    192 +		else if (ich > ICONSIZE) ich = ICONSIZE;
    193 +	}
    194 +
    195 +	unsigned char *icbuf = malloc(icw * ich << 2); if(!icbuf) { XFree(p); return NULL; }
    196 +#if ULONG_MAX > UINT32_MAX
    197 +	int i, sz = w * h;
    198 +	uint32_t *bstp32 = (uint32_t *)bstp;
    199 +	for (i = 0; i < sz; ++i) bstp32[i] = bstp[i];
    200 +#endif
    201 +	if (w == icw && h == ich) memcpy(icbuf, bstp, icw * ich << 2);
    202 +	else stbir_resize_uint8((unsigned char *)bstp, w, h, 0, icbuf, icw, ich, 0, 4);
    203 +	XFree(p);
    204 +
    205 +	return XCreateImage(dpy, DefaultVisual(dpy, screen), DefaultDepth(dpy, screen), ZPixmap, 0, (char *)icbuf, icw, ich, 32, 0);
    206 +}
    207 +
    208  int
    209  gettextprop(Window w, Atom atom, char *text, unsigned int size)
    210  {
    211 @@ -1030,6 +1104,8 @@ manage(Window w, XWindowAttributes *wa)
    212  	c->h = c->oldh = wa->height;
    213  	c->oldbw = wa->border_width;
    214  
    215 +	c->icon = NULL;
    216 +	updateicon(c);
    217  	updatetitle(c);
    218  	if (XGetTransientForHint(dpy, w, &trans) && (t = wintoclient(trans))) {
    219  		c->mon = t->mon;
    220 @@ -1235,11 +1311,18 @@ propertynotify(XEvent *e)
    221  			drawbars();
    222  			break;
    223  		}
    224 +		int ub = 0, rdb = c == c->mon->sel;
    225  		if (ev->atom == XA_WM_NAME || ev->atom == netatom[NetWMName]) {
    226  			updatetitle(c);
    227 -			if (c == c->mon->sel)
    228 -				drawbar(c->mon);
    229 +			ub = rdb;
    230  		}
    231 +		if (ev->atom == netatom[NetWMIcon]) {
    232 +			updateicon(c);
    233 +			ub = rdb;
    234 +		}
    235 +
    236 +		if (ub) drawbar(c->mon);
    237 +
    238  		if (ev->atom == netatom[NetWMWindowType])
    239  			updatewindowtype(c);
    240  	}
    241 @@ -1556,6 +1639,7 @@ setup(void)
    242  	netatom[NetActiveWindow] = XInternAtom(dpy, "_NET_ACTIVE_WINDOW", False);
    243  	netatom[NetSupported] = XInternAtom(dpy, "_NET_SUPPORTED", False);
    244  	netatom[NetWMName] = XInternAtom(dpy, "_NET_WM_NAME", False);
    245 +	netatom[NetWMIcon] = XInternAtom(dpy, "_NET_WM_ICON", False);
    246  	netatom[NetWMState] = XInternAtom(dpy, "_NET_WM_STATE", False);
    247  	netatom[NetWMCheck] = XInternAtom(dpy, "_NET_SUPPORTING_WM_CHECK", False);
    248  	netatom[NetWMFullscreen] = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False);
    249 @@ -1746,6 +1830,15 @@ toggleview(const Arg *arg)
    250  	}
    251  }
    252  
    253 +void
    254 +freeicon(Client *c)
    255 +{
    256 +	if (c->icon) {
    257 +		XDestroyImage(c->icon);
    258 +		c->icon = NULL;
    259 +	}
    260 +}
    261 +
    262  void
    263  unfocus(Client *c, int setfocus)
    264  {
    265 @@ -1767,6 +1860,7 @@ unmanage(Client *c, int destroyed)
    266  
    267  	detach(c);
    268  	detachstack(c);
    269 +	freeicon(c);
    270  	if (!destroyed) {
    271  		wc.border_width = c->oldbw;
    272  		XGrabServer(dpy); /* avoid race conditions */
    273 @@ -2001,6 +2095,13 @@ updatetitle(Client *c)
    274  		strcpy(c->name, broken);
    275  }
    276  
    277 +void
    278 +updateicon(Client *c)
    279 +{
    280 +	freeicon(c);
    281 +	c->icon = geticonprop(c->win);
    282 +}
    283 +
    284  void
    285  updatewindowtype(Client *c)
    286  {
    287 diff --git a/stb_image_resize.h b/stb_image_resize.h
    288 new file mode 100644
    289 index 0000000..42a8efb
    290 --- /dev/null
    291 +++ b/stb_image_resize.h
    292 @@ -0,0 +1,2631 @@
    293 +/* stb_image_resize - v0.96 - public domain image resizing
    294 +   by Jorge L Rodriguez (@VinoBS) - 2014
    295 +   http://github.com/nothings/stb
    296 +
    297 +   Written with emphasis on usability, portability, and efficiency. (No
    298 +   SIMD or threads, so it be easily outperformed by libs that use those.)
    299 +   Only scaling and translation is supported, no rotations or shears.
    300 +   Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation.
    301 +
    302 +   COMPILING & LINKING
    303 +      In one C/C++ file that #includes this file, do this:
    304 +         #define STB_IMAGE_RESIZE_IMPLEMENTATION
    305 +      before the #include. That will create the implementation in that file.
    306 +
    307 +   QUICKSTART
    308 +      stbir_resize_uint8(      input_pixels , in_w , in_h , 0,
    309 +                               output_pixels, out_w, out_h, 0, num_channels)
    310 +      stbir_resize_float(...)
    311 +      stbir_resize_uint8_srgb( input_pixels , in_w , in_h , 0,
    312 +                               output_pixels, out_w, out_h, 0,
    313 +                               num_channels , alpha_chan  , 0)
    314 +      stbir_resize_uint8_srgb_edgemode(
    315 +                               input_pixels , in_w , in_h , 0,
    316 +                               output_pixels, out_w, out_h, 0,
    317 +                               num_channels , alpha_chan  , 0, STBIR_EDGE_CLAMP)
    318 +                                                            // WRAP/REFLECT/ZERO
    319 +
    320 +   FULL API
    321 +      See the "header file" section of the source for API documentation.
    322 +
    323 +   ADDITIONAL DOCUMENTATION
    324 +
    325 +      SRGB & FLOATING POINT REPRESENTATION
    326 +         The sRGB functions presume IEEE floating point. If you do not have
    327 +         IEEE floating point, define STBIR_NON_IEEE_FLOAT. This will use
    328 +         a slower implementation.
    329 +
    330 +      MEMORY ALLOCATION
    331 +         The resize functions here perform a single memory allocation using
    332 +         malloc. To control the memory allocation, before the #include that
    333 +         triggers the implementation, do:
    334 +
    335 +            #define STBIR_MALLOC(size,context) ...
    336 +            #define STBIR_FREE(ptr,context)   ...
    337 +
    338 +         Each resize function makes exactly one call to malloc/free, so to use
    339 +         temp memory, store the temp memory in the context and return that.
    340 +
    341 +      ASSERT
    342 +         Define STBIR_ASSERT(boolval) to override assert() and not use assert.h
    343 +
    344 +      OPTIMIZATION
    345 +         Define STBIR_SATURATE_INT to compute clamp values in-range using
    346 +         integer operations instead of float operations. This may be faster
    347 +         on some platforms.
    348 +
    349 +      DEFAULT FILTERS
    350 +         For functions which don't provide explicit control over what filters
    351 +         to use, you can change the compile-time defaults with
    352 +
    353 +            #define STBIR_DEFAULT_FILTER_UPSAMPLE     STBIR_FILTER_something
    354 +            #define STBIR_DEFAULT_FILTER_DOWNSAMPLE   STBIR_FILTER_something
    355 +
    356 +         See stbir_filter in the header-file section for the list of filters.
    357 +
    358 +      NEW FILTERS
    359 +         A number of 1D filter kernels are used. For a list of
    360 +         supported filters see the stbir_filter enum. To add a new filter,
    361 +         write a filter function and add it to stbir__filter_info_table.
    362 +
    363 +      PROGRESS
    364 +         For interactive use with slow resize operations, you can install
    365 +         a progress-report callback:
    366 +
    367 +            #define STBIR_PROGRESS_REPORT(val)   some_func(val)
    368 +
    369 +         The parameter val is a float which goes from 0 to 1 as progress is made.
    370 +
    371 +         For example:
    372 +
    373 +            static void my_progress_report(float progress);
    374 +            #define STBIR_PROGRESS_REPORT(val) my_progress_report(val)
    375 +
    376 +            #define STB_IMAGE_RESIZE_IMPLEMENTATION
    377 +            #include "stb_image_resize.h"
    378 +
    379 +            static void my_progress_report(float progress)
    380 +            {
    381 +               printf("Progress: %f%%\n", progress*100);
    382 +            }
    383 +
    384 +      MAX CHANNELS
    385 +         If your image has more than 64 channels, define STBIR_MAX_CHANNELS
    386 +         to the max you'll have.
    387 +
    388 +      ALPHA CHANNEL
    389 +         Most of the resizing functions provide the ability to control how
    390 +         the alpha channel of an image is processed. The important things
    391 +         to know about this:
    392 +
    393 +         1. The best mathematically-behaved version of alpha to use is
    394 +         called "premultiplied alpha", in which the other color channels
    395 +         have had the alpha value multiplied in. If you use premultiplied
    396 +         alpha, linear filtering (such as image resampling done by this
    397 +         library, or performed in texture units on GPUs) does the "right
    398 +         thing". While premultiplied alpha is standard in the movie CGI
    399 +         industry, it is still uncommon in the videogame/real-time world.
    400 +
    401 +         If you linearly filter non-premultiplied alpha, strange effects
    402 +         occur. (For example, the 50/50 average of 99% transparent bright green
    403 +         and 1% transparent black produces 50% transparent dark green when
    404 +         non-premultiplied, whereas premultiplied it produces 50%
    405 +         transparent near-black. The former introduces green energy
    406 +         that doesn't exist in the source image.)
    407 +
    408 +         2. Artists should not edit premultiplied-alpha images; artists
    409 +         want non-premultiplied alpha images. Thus, art tools generally output
    410 +         non-premultiplied alpha images.
    411 +
    412 +         3. You will get best results in most cases by converting images
    413 +         to premultiplied alpha before processing them mathematically.
    414 +
    415 +         4. If you pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED, the
    416 +         resizer does not do anything special for the alpha channel;
    417 +         it is resampled identically to other channels. This produces
    418 +         the correct results for premultiplied-alpha images, but produces
    419 +         less-than-ideal results for non-premultiplied-alpha images.
    420 +
    421 +         5. If you do not pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED,
    422 +         then the resizer weights the contribution of input pixels
    423 +         based on their alpha values, or, equivalently, it multiplies
    424 +         the alpha value into the color channels, resamples, then divides
    425 +         by the resultant alpha value. Input pixels which have alpha=0 do
    426 +         not contribute at all to output pixels unless _all_ of the input
    427 +         pixels affecting that output pixel have alpha=0, in which case
    428 +         the result for that pixel is the same as it would be without
    429 +         STBIR_FLAG_ALPHA_PREMULTIPLIED. However, this is only true for
    430 +         input images in integer formats. For input images in float format,
    431 +         input pixels with alpha=0 have no effect, and output pixels
    432 +         which have alpha=0 will be 0 in all channels. (For float images,
    433 +         you can manually achieve the same result by adding a tiny epsilon
    434 +         value to the alpha channel of every image, and then subtracting
    435 +         or clamping it at the end.)
    436 +
    437 +         6. You can suppress the behavior described in #5 and make
    438 +         all-0-alpha pixels have 0 in all channels by #defining
    439 +         STBIR_NO_ALPHA_EPSILON.
    440 +
    441 +         7. You can separately control whether the alpha channel is
    442 +         interpreted as linear or affected by the colorspace. By default
    443 +         it is linear; you almost never want to apply the colorspace.
    444 +         (For example, graphics hardware does not apply sRGB conversion
    445 +         to the alpha channel.)
    446 +
    447 +   CONTRIBUTORS
    448 +      Jorge L Rodriguez: Implementation
    449 +      Sean Barrett: API design, optimizations
    450 +      Aras Pranckevicius: bugfix
    451 +      Nathan Reed: warning fixes
    452 +
    453 +   REVISIONS
    454 +      0.97 (2020-02-02) fixed warning
    455 +      0.96 (2019-03-04) fixed warnings
    456 +      0.95 (2017-07-23) fixed warnings
    457 +      0.94 (2017-03-18) fixed warnings
    458 +      0.93 (2017-03-03) fixed bug with certain combinations of heights
    459 +      0.92 (2017-01-02) fix integer overflow on large (>2GB) images
    460 +      0.91 (2016-04-02) fix warnings; fix handling of subpixel regions
    461 +      0.90 (2014-09-17) first released version
    462 +
    463 +   LICENSE
    464 +     See end of file for license information.
    465 +
    466 +   TODO
    467 +      Don't decode all of the image data when only processing a partial tile
    468 +      Don't use full-width decode buffers when only processing a partial tile
    469 +      When processing wide images, break processing into tiles so data fits in L1 cache
    470 +      Installable filters?
    471 +      Resize that respects alpha test coverage
    472 +         (Reference code: FloatImage::alphaTestCoverage and FloatImage::scaleAlphaToCoverage:
    473 +         https://code.google.com/p/nvidia-texture-tools/source/browse/trunk/src/nvimage/FloatImage.cpp )
    474 +*/
    475 +
    476 +#ifndef STBIR_INCLUDE_STB_IMAGE_RESIZE_H
    477 +#define STBIR_INCLUDE_STB_IMAGE_RESIZE_H
    478 +
    479 +#ifdef _MSC_VER
    480 +typedef unsigned char  stbir_uint8;
    481 +typedef unsigned short stbir_uint16;
    482 +typedef unsigned int   stbir_uint32;
    483 +#else
    484 +#include <stdint.h>
    485 +typedef uint8_t  stbir_uint8;
    486 +typedef uint16_t stbir_uint16;
    487 +typedef uint32_t stbir_uint32;
    488 +#endif
    489 +
    490 +#ifndef STBIRDEF
    491 +#ifdef STB_IMAGE_RESIZE_STATIC
    492 +#define STBIRDEF static
    493 +#else
    494 +#ifdef __cplusplus
    495 +#define STBIRDEF extern "C"
    496 +#else
    497 +#define STBIRDEF extern
    498 +#endif
    499 +#endif
    500 +#endif
    501 +
    502 +//////////////////////////////////////////////////////////////////////////////
    503 +//
    504 +// Easy-to-use API:
    505 +//
    506 +//     * "input pixels" points to an array of image data with 'num_channels' channels (e.g. RGB=3, RGBA=4)
    507 +//     * input_w is input image width (x-axis), input_h is input image height (y-axis)
    508 +//     * stride is the offset between successive rows of image data in memory, in bytes. you can
    509 +//       specify 0 to mean packed continuously in memory
    510 +//     * alpha channel is treated identically to other channels.
    511 +//     * colorspace is linear or sRGB as specified by function name
    512 +//     * returned result is 1 for success or 0 in case of an error.
    513 +//       #define STBIR_ASSERT() to trigger an assert on parameter validation errors.
    514 +//     * Memory required grows approximately linearly with input and output size, but with
    515 +//       discontinuities at input_w == output_w and input_h == output_h.
    516 +//     * These functions use a "default" resampling filter defined at compile time. To change the filter,
    517 +//       you can change the compile-time defaults by #defining STBIR_DEFAULT_FILTER_UPSAMPLE
    518 +//       and STBIR_DEFAULT_FILTER_DOWNSAMPLE, or you can use the medium-complexity API.
    519 +
    520 +STBIRDEF int stbir_resize_uint8(     const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
    521 +                                           unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
    522 +                                     int num_channels);
    523 +
    524 +STBIRDEF int stbir_resize_float(     const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
    525 +                                           float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
    526 +                                     int num_channels);
    527 +
    528 +
    529 +// The following functions interpret image data as gamma-corrected sRGB.
    530 +// Specify STBIR_ALPHA_CHANNEL_NONE if you have no alpha channel,
    531 +// or otherwise provide the index of the alpha channel. Flags value
    532 +// of 0 will probably do the right thing if you're not sure what
    533 +// the flags mean.
    534 +
    535 +#define STBIR_ALPHA_CHANNEL_NONE       -1
    536 +
    537 +// Set this flag if your texture has premultiplied alpha. Otherwise, stbir will
    538 +// use alpha-weighted resampling (effectively premultiplying, resampling,
    539 +// then unpremultiplying).
    540 +#define STBIR_FLAG_ALPHA_PREMULTIPLIED    (1 << 0)
    541 +// The specified alpha channel should be handled as gamma-corrected value even
    542 +// when doing sRGB operations.
    543 +#define STBIR_FLAG_ALPHA_USES_COLORSPACE  (1 << 1)
    544 +
    545 +STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
    546 +                                           unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
    547 +                                     int num_channels, int alpha_channel, int flags);
    548 +
    549 +
    550 +typedef enum
    551 +{
    552 +    STBIR_EDGE_CLAMP   = 1,
    553 +    STBIR_EDGE_REFLECT = 2,
    554 +    STBIR_EDGE_WRAP    = 3,
    555 +    STBIR_EDGE_ZERO    = 4,
    556 +} stbir_edge;
    557 +
    558 +// This function adds the ability to specify how requests to sample off the edge of the image are handled.
    559 +STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
    560 +                                                    unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
    561 +                                              int num_channels, int alpha_channel, int flags,
    562 +                                              stbir_edge edge_wrap_mode);
    563 +
    564 +//////////////////////////////////////////////////////////////////////////////
    565 +//
    566 +// Medium-complexity API
    567 +//
    568 +// This extends the easy-to-use API as follows:
    569 +//
    570 +//     * Alpha-channel can be processed separately
    571 +//       * If alpha_channel is not STBIR_ALPHA_CHANNEL_NONE
    572 +//         * Alpha channel will not be gamma corrected (unless flags&STBIR_FLAG_GAMMA_CORRECT)
    573 +//         * Filters will be weighted by alpha channel (unless flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)
    574 +//     * Filter can be selected explicitly
    575 +//     * uint16 image type
    576 +//     * sRGB colorspace available for all types
    577 +//     * context parameter for passing to STBIR_MALLOC
    578 +
    579 +typedef enum
    580 +{
    581 +    STBIR_FILTER_DEFAULT      = 0,  // use same filter type that easy-to-use API chooses
    582 +    STBIR_FILTER_BOX          = 1,  // A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios
    583 +    STBIR_FILTER_TRIANGLE     = 2,  // On upsampling, produces same results as bilinear texture filtering
    584 +    STBIR_FILTER_CUBICBSPLINE = 3,  // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque
    585 +    STBIR_FILTER_CATMULLROM   = 4,  // An interpolating cubic spline
    586 +    STBIR_FILTER_MITCHELL     = 5,  // Mitchell-Netrevalli filter with B=1/3, C=1/3
    587 +} stbir_filter;
    588 +
    589 +typedef enum
    590 +{
    591 +    STBIR_COLORSPACE_LINEAR,
    592 +    STBIR_COLORSPACE_SRGB,
    593 +
    594 +    STBIR_MAX_COLORSPACES,
    595 +} stbir_colorspace;
    596 +
    597 +// The following functions are all identical except for the type of the image data
    598 +
    599 +STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
    600 +                                               unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
    601 +                                         int num_channels, int alpha_channel, int flags,
    602 +                                         stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
    603 +                                         void *alloc_context);
    604 +
    605 +STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels  , int input_w , int input_h , int input_stride_in_bytes,
    606 +                                               stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
    607 +                                         int num_channels, int alpha_channel, int flags,
    608 +                                         stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
    609 +                                         void *alloc_context);
    610 +
    611 +STBIRDEF int stbir_resize_float_generic( const float *input_pixels         , int input_w , int input_h , int input_stride_in_bytes,
    612 +                                               float *output_pixels        , int output_w, int output_h, int output_stride_in_bytes,
    613 +                                         int num_channels, int alpha_channel, int flags,
    614 +                                         stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
    615 +                                         void *alloc_context);
    616 +
    617 +
    618 +
    619 +//////////////////////////////////////////////////////////////////////////////
    620 +//
    621 +// Full-complexity API
    622 +//
    623 +// This extends the medium API as follows:
    624 +//
    625 +//       * uint32 image type
    626 +//     * not typesafe
    627 +//     * separate filter types for each axis
    628 +//     * separate edge modes for each axis
    629 +//     * can specify scale explicitly for subpixel correctness
    630 +//     * can specify image source tile using texture coordinates
    631 +
    632 +typedef enum
    633 +{
    634 +    STBIR_TYPE_UINT8 ,
    635 +    STBIR_TYPE_UINT16,
    636 +    STBIR_TYPE_UINT32,
    637 +    STBIR_TYPE_FLOAT ,
    638 +
    639 +    STBIR_MAX_TYPES
    640 +} stbir_datatype;
    641 +
    642 +STBIRDEF int stbir_resize(         const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
    643 +                                         void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
    644 +                                   stbir_datatype datatype,
    645 +                                   int num_channels, int alpha_channel, int flags,
    646 +                                   stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
    647 +                                   stbir_filter filter_horizontal,  stbir_filter filter_vertical,
    648 +                                   stbir_colorspace space, void *alloc_context);
    649 +
    650 +STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
    651 +                                         void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
    652 +                                   stbir_datatype datatype,
    653 +                                   int num_channels, int alpha_channel, int flags,
    654 +                                   stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
    655 +                                   stbir_filter filter_horizontal,  stbir_filter filter_vertical,
    656 +                                   stbir_colorspace space, void *alloc_context,
    657 +                                   float x_scale, float y_scale,
    658 +                                   float x_offset, float y_offset);
    659 +
    660 +STBIRDEF int stbir_resize_region(  const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
    661 +                                         void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
    662 +                                   stbir_datatype datatype,
    663 +                                   int num_channels, int alpha_channel, int flags,
    664 +                                   stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
    665 +                                   stbir_filter filter_horizontal,  stbir_filter filter_vertical,
    666 +                                   stbir_colorspace space, void *alloc_context,
    667 +                                   float s0, float t0, float s1, float t1);
    668 +// (s0, t0) & (s1, t1) are the top-left and bottom right corner (uv addressing style: [0, 1]x[0, 1]) of a region of the input image to use.
    669 +
    670 +//
    671 +//
    672 +////   end header file   /////////////////////////////////////////////////////
    673 +#endif // STBIR_INCLUDE_STB_IMAGE_RESIZE_H
    674 +
    675 +
    676 +
    677 +
    678 +
    679 +#ifdef STB_IMAGE_RESIZE_IMPLEMENTATION
    680 +
    681 +#ifndef STBIR_ASSERT
    682 +#include <assert.h>
    683 +#define STBIR_ASSERT(x) assert(x)
    684 +#endif
    685 +
    686 +// For memset
    687 +#include <string.h>
    688 +
    689 +#include <math.h>
    690 +
    691 +#ifndef STBIR_MALLOC
    692 +#include <stdlib.h>
    693 +// use comma operator to evaluate c, to avoid "unused parameter" warnings
    694 +#define STBIR_MALLOC(size,c) ((void)(c), malloc(size))
    695 +#define STBIR_FREE(ptr,c)    ((void)(c), free(ptr))
    696 +#endif
    697 +
    698 +#ifndef _MSC_VER
    699 +#ifdef __cplusplus
    700 +#define stbir__inline inline
    701 +#else
    702 +#define stbir__inline
    703 +#endif
    704 +#else
    705 +#define stbir__inline __forceinline
    706 +#endif
    707 +
    708 +
    709 +// should produce compiler error if size is wrong
    710 +typedef unsigned char stbir__validate_uint32[sizeof(stbir_uint32) == 4 ? 1 : -1];
    711 +
    712 +#ifdef _MSC_VER
    713 +#define STBIR__NOTUSED(v)  (void)(v)
    714 +#else
    715 +#define STBIR__NOTUSED(v)  (void)sizeof(v)
    716 +#endif
    717 +
    718 +#define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0]))
    719 +
    720 +#ifndef STBIR_DEFAULT_FILTER_UPSAMPLE
    721 +#define STBIR_DEFAULT_FILTER_UPSAMPLE    STBIR_FILTER_CATMULLROM
    722 +#endif
    723 +
    724 +#ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE
    725 +#define STBIR_DEFAULT_FILTER_DOWNSAMPLE  STBIR_FILTER_MITCHELL
    726 +#endif
    727 +
    728 +#ifndef STBIR_PROGRESS_REPORT
    729 +#define STBIR_PROGRESS_REPORT(float_0_to_1)
    730 +#endif
    731 +
    732 +#ifndef STBIR_MAX_CHANNELS
    733 +#define STBIR_MAX_CHANNELS 64
    734 +#endif
    735 +
    736 +#if STBIR_MAX_CHANNELS > 65536
    737 +#error "Too many channels; STBIR_MAX_CHANNELS must be no more than 65536."
    738 +// because we store the indices in 16-bit variables
    739 +#endif
    740 +
    741 +// This value is added to alpha just before premultiplication to avoid
    742 +// zeroing out color values. It is equivalent to 2^-80. If you don't want
    743 +// that behavior (it may interfere if you have floating point images with
    744 +// very small alpha values) then you can define STBIR_NO_ALPHA_EPSILON to
    745 +// disable it.
    746 +#ifndef STBIR_ALPHA_EPSILON
    747 +#define STBIR_ALPHA_EPSILON ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
    748 +#endif
    749 +
    750 +
    751 +
    752 +#ifdef _MSC_VER
    753 +#define STBIR__UNUSED_PARAM(v)  (void)(v)
    754 +#else
    755 +#define STBIR__UNUSED_PARAM(v)  (void)sizeof(v)
    756 +#endif
    757 +
    758 +// must match stbir_datatype
    759 +static unsigned char stbir__type_size[] = {
    760 +    1, // STBIR_TYPE_UINT8
    761 +    2, // STBIR_TYPE_UINT16
    762 +    4, // STBIR_TYPE_UINT32
    763 +    4, // STBIR_TYPE_FLOAT
    764 +};
    765 +
    766 +// Kernel function centered at 0
    767 +typedef float (stbir__kernel_fn)(float x, float scale);
    768 +typedef float (stbir__support_fn)(float scale);
    769 +
    770 +typedef struct
    771 +{
    772 +    stbir__kernel_fn* kernel;
    773 +    stbir__support_fn* support;
    774 +} stbir__filter_info;
    775 +
    776 +// When upsampling, the contributors are which source pixels contribute.
    777 +// When downsampling, the contributors are which destination pixels are contributed to.
    778 +typedef struct
    779 +{
    780 +    int n0; // First contributing pixel
    781 +    int n1; // Last contributing pixel
    782 +} stbir__contributors;
    783 +
    784 +typedef struct
    785 +{
    786 +    const void* input_data;
    787 +    int input_w;
    788 +    int input_h;
    789 +    int input_stride_bytes;
    790 +
    791 +    void* output_data;
    792 +    int output_w;
    793 +    int output_h;
    794 +    int output_stride_bytes;
    795 +
    796 +    float s0, t0, s1, t1;
    797 +
    798 +    float horizontal_shift; // Units: output pixels
    799 +    float vertical_shift;   // Units: output pixels
    800 +    float horizontal_scale;
    801 +    float vertical_scale;
    802 +
    803 +    int channels;
    804 +    int alpha_channel;
    805 +    stbir_uint32 flags;
    806 +    stbir_datatype type;
    807 +    stbir_filter horizontal_filter;
    808 +    stbir_filter vertical_filter;
    809 +    stbir_edge edge_horizontal;
    810 +    stbir_edge edge_vertical;
    811 +    stbir_colorspace colorspace;
    812 +
    813 +    stbir__contributors* horizontal_contributors;
    814 +    float* horizontal_coefficients;
    815 +
    816 +    stbir__contributors* vertical_contributors;
    817 +    float* vertical_coefficients;
    818 +
    819 +    int decode_buffer_pixels;
    820 +    float* decode_buffer;
    821 +
    822 +    float* horizontal_buffer;
    823 +
    824 +    // cache these because ceil/floor are inexplicably showing up in profile
    825 +    int horizontal_coefficient_width;
    826 +    int vertical_coefficient_width;
    827 +    int horizontal_filter_pixel_width;
    828 +    int vertical_filter_pixel_width;
    829 +    int horizontal_filter_pixel_margin;
    830 +    int vertical_filter_pixel_margin;
    831 +    int horizontal_num_contributors;
    832 +    int vertical_num_contributors;
    833 +
    834 +    int ring_buffer_length_bytes;   // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter)
    835 +    int ring_buffer_num_entries;    // Total number of entries in the ring buffer.
    836 +    int ring_buffer_first_scanline;
    837 +    int ring_buffer_last_scanline;
    838 +    int ring_buffer_begin_index;    // first_scanline is at this index in the ring buffer
    839 +    float* ring_buffer;
    840 +
    841 +    float* encode_buffer; // A temporary buffer to store floats so we don't lose precision while we do multiply-adds.
    842 +
    843 +    int horizontal_contributors_size;
    844 +    int horizontal_coefficients_size;
    845 +    int vertical_contributors_size;
    846 +    int vertical_coefficients_size;
    847 +    int decode_buffer_size;
    848 +    int horizontal_buffer_size;
    849 +    int ring_buffer_size;
    850 +    int encode_buffer_size;
    851 +} stbir__info;
    852 +
    853 +
    854 +static const float stbir__max_uint8_as_float  = 255.0f;
    855 +static const float stbir__max_uint16_as_float = 65535.0f;
    856 +static const double stbir__max_uint32_as_float = 4294967295.0;
    857 +
    858 +
    859 +static stbir__inline int stbir__min(int a, int b)
    860 +{
    861 +    return a < b ? a : b;
    862 +}
    863 +
    864 +static stbir__inline float stbir__saturate(float x)
    865 +{
    866 +    if (x < 0)
    867 +        return 0;
    868 +
    869 +    if (x > 1)
    870 +        return 1;
    871 +
    872 +    return x;
    873 +}
    874 +
    875 +#ifdef STBIR_SATURATE_INT
    876 +static stbir__inline stbir_uint8 stbir__saturate8(int x)
    877 +{
    878 +    if ((unsigned int) x <= 255)
    879 +        return x;
    880 +
    881 +    if (x < 0)
    882 +        return 0;
    883 +
    884 +    return 255;
    885 +}
    886 +
    887 +static stbir__inline stbir_uint16 stbir__saturate16(int x)
    888 +{
    889 +    if ((unsigned int) x <= 65535)
    890 +        return x;
    891 +
    892 +    if (x < 0)
    893 +        return 0;
    894 +
    895 +    return 65535;
    896 +}
    897 +#endif
    898 +
    899 +static float stbir__srgb_uchar_to_linear_float[256] = {
    900 +    0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f,
    901 +    0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f,
    902 +    0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f,
    903 +    0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f,
    904 +    0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f,
    905 +    0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f,
    906 +    0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f,
    907 +    0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f,
    908 +    0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f,
    909 +    0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f,
    910 +    0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f,
    911 +    0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f,
    912 +    0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f,
    913 +    0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f,
    914 +    0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f,
    915 +    0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f,
    916 +    0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f,
    917 +    0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f,
    918 +    0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f,
    919 +    0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f,
    920 +    0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f,
    921 +    0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f,
    922 +    0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f,
    923 +    0.982251f, 0.991102f, 1.0f
    924 +};
    925 +
    926 +static float stbir__srgb_to_linear(float f)
    927 +{
    928 +    if (f <= 0.04045f)
    929 +        return f / 12.92f;
    930 +    else
    931 +        return (float)pow((f + 0.055f) / 1.055f, 2.4f);
    932 +}
    933 +
    934 +static float stbir__linear_to_srgb(float f)
    935 +{
    936 +    if (f <= 0.0031308f)
    937 +        return f * 12.92f;
    938 +    else
    939 +        return 1.055f * (float)pow(f, 1 / 2.4f) - 0.055f;
    940 +}
    941 +
    942 +#ifndef STBIR_NON_IEEE_FLOAT
    943 +// From https://gist.github.com/rygorous/2203834
    944 +
    945 +typedef union
    946 +{
    947 +    stbir_uint32 u;
    948 +    float f;
    949 +} stbir__FP32;
    950 +
    951 +static const stbir_uint32 fp32_to_srgb8_tab4[104] = {
    952 +    0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
    953 +    0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
    954 +    0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
    955 +    0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
    956 +    0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
    957 +    0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
    958 +    0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
    959 +    0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
    960 +    0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
    961 +    0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
    962 +    0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
    963 +    0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
    964 +    0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
    965 +};
    966 +
    967 +static stbir_uint8 stbir__linear_to_srgb_uchar(float in)
    968 +{
    969 +    static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps
    970 +    static const stbir__FP32 minval = { (127-13) << 23 };
    971 +    stbir_uint32 tab,bias,scale,t;
    972 +    stbir__FP32 f;
    973 +
    974 +    // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
    975 +    // The tests are carefully written so that NaNs map to 0, same as in the reference
    976 +    // implementation.
    977 +    if (!(in > minval.f)) // written this way to catch NaNs
    978 +        in = minval.f;
    979 +    if (in > almostone.f)
    980 +        in = almostone.f;
    981 +
    982 +    // Do the table lookup and unpack bias, scale
    983 +    f.f = in;
    984 +    tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20];
    985 +    bias = (tab >> 16) << 9;
    986 +    scale = tab & 0xffff;
    987 +
    988 +    // Grab next-highest mantissa bits and perform linear interpolation
    989 +    t = (f.u >> 12) & 0xff;
    990 +    return (unsigned char) ((bias + scale*t) >> 16);
    991 +}
    992 +
    993 +#else
    994 +// sRGB transition values, scaled by 1<<28
    995 +static int stbir__srgb_offset_to_linear_scaled[256] =
    996 +{
    997 +            0,     40738,    122216,    203693,    285170,    366648,    448125,    529603,
    998 +       611080,    692557,    774035,    855852,    942009,   1033024,   1128971,   1229926,
    999 +      1335959,   1447142,   1563542,   1685229,   1812268,   1944725,   2082664,   2226148,
   1000 +      2375238,   2529996,   2690481,   2856753,   3028870,   3206888,   3390865,   3580856,
   1001 +      3776916,   3979100,   4187460,   4402049,   4622919,   4850123,   5083710,   5323731,
   1002 +      5570236,   5823273,   6082892,   6349140,   6622065,   6901714,   7188133,   7481369,
   1003 +      7781466,   8088471,   8402427,   8723380,   9051372,   9386448,   9728650,  10078021,
   1004 +     10434603,  10798439,  11169569,  11548036,  11933879,  12327139,  12727857,  13136073,
   1005 +     13551826,  13975156,  14406100,  14844697,  15290987,  15745007,  16206795,  16676389,
   1006 +     17153826,  17639142,  18132374,  18633560,  19142734,  19659934,  20185196,  20718552,
   1007 +     21260042,  21809696,  22367554,  22933648,  23508010,  24090680,  24681686,  25281066,
   1008 +     25888850,  26505076,  27129772,  27762974,  28404716,  29055026,  29713942,  30381490,
   1009 +     31057708,  31742624,  32436272,  33138682,  33849884,  34569912,  35298800,  36036568,
   1010 +     36783260,  37538896,  38303512,  39077136,  39859796,  40651528,  41452360,  42262316,
   1011 +     43081432,  43909732,  44747252,  45594016,  46450052,  47315392,  48190064,  49074096,
   1012 +     49967516,  50870356,  51782636,  52704392,  53635648,  54576432,  55526772,  56486700,
   1013 +     57456236,  58435408,  59424248,  60422780,  61431036,  62449032,  63476804,  64514376,
   1014 +     65561776,  66619028,  67686160,  68763192,  69850160,  70947088,  72053992,  73170912,
   1015 +     74297864,  75434880,  76581976,  77739184,  78906536,  80084040,  81271736,  82469648,
   1016 +     83677792,  84896192,  86124888,  87363888,  88613232,  89872928,  91143016,  92423512,
   1017 +     93714432,  95015816,  96327688,  97650056,  98982952, 100326408, 101680440, 103045072,
   1018 +    104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544,
   1019 +    115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832,
   1020 +    127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528,
   1021 +    140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968,
   1022 +    154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184,
   1023 +    168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992,
   1024 +    183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968,
   1025 +    199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480,
   1026 +    215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656,
   1027 +    232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464,
   1028 +    250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664,
   1029 +};
   1030 +
   1031 +static stbir_uint8 stbir__linear_to_srgb_uchar(float f)
   1032 +{
   1033 +    int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp
   1034 +    int v = 0;
   1035 +    int i;
   1036 +
   1037 +    // Refine the guess with a short binary search.
   1038 +    i = v + 128; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
   1039 +    i = v +  64; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
   1040 +    i = v +  32; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
   1041 +    i = v +  16; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
   1042 +    i = v +   8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
   1043 +    i = v +   4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
   1044 +    i = v +   2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
   1045 +    i = v +   1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
   1046 +
   1047 +    return (stbir_uint8) v;
   1048 +}
   1049 +#endif
   1050 +
   1051 +static float stbir__filter_trapezoid(float x, float scale)
   1052 +{
   1053 +    float halfscale = scale / 2;
   1054 +    float t = 0.5f + halfscale;
   1055 +    STBIR_ASSERT(scale <= 1);
   1056 +
   1057 +    x = (float)fabs(x);
   1058 +
   1059 +    if (x >= t)
   1060 +        return 0;
   1061 +    else
   1062 +    {
   1063 +        float r = 0.5f - halfscale;
   1064 +        if (x <= r)
   1065 +            return 1;
   1066 +        else
   1067 +            return (t - x) / scale;
   1068 +    }
   1069 +}
   1070 +
   1071 +static float stbir__support_trapezoid(float scale)
   1072 +{
   1073 +    STBIR_ASSERT(scale <= 1);
   1074 +    return 0.5f + scale / 2;
   1075 +}
   1076 +
   1077 +static float stbir__filter_triangle(float x, float s)
   1078 +{
   1079 +    STBIR__UNUSED_PARAM(s);
   1080 +
   1081 +    x = (float)fabs(x);
   1082 +
   1083 +    if (x <= 1.0f)
   1084 +        return 1 - x;
   1085 +    else
   1086 +        return 0;
   1087 +}
   1088 +
   1089 +static float stbir__filter_cubic(float x, float s)
   1090 +{
   1091 +    STBIR__UNUSED_PARAM(s);
   1092 +
   1093 +    x = (float)fabs(x);
   1094 +
   1095 +    if (x < 1.0f)
   1096 +        return (4 + x*x*(3*x - 6))/6;
   1097 +    else if (x < 2.0f)
   1098 +        return (8 + x*(-12 + x*(6 - x)))/6;
   1099 +
   1100 +    return (0.0f);
   1101 +}
   1102 +
   1103 +static float stbir__filter_catmullrom(float x, float s)
   1104 +{
   1105 +    STBIR__UNUSED_PARAM(s);
   1106 +
   1107 +    x = (float)fabs(x);
   1108 +
   1109 +    if (x < 1.0f)
   1110 +        return 1 - x*x*(2.5f - 1.5f*x);
   1111 +    else if (x < 2.0f)
   1112 +        return 2 - x*(4 + x*(0.5f*x - 2.5f));
   1113 +
   1114 +    return (0.0f);
   1115 +}
   1116 +
   1117 +static float stbir__filter_mitchell(float x, float s)
   1118 +{
   1119 +    STBIR__UNUSED_PARAM(s);
   1120 +
   1121 +    x = (float)fabs(x);
   1122 +
   1123 +    if (x < 1.0f)
   1124 +        return (16 + x*x*(21 * x - 36))/18;
   1125 +    else if (x < 2.0f)
   1126 +        return (32 + x*(-60 + x*(36 - 7*x)))/18;
   1127 +
   1128 +    return (0.0f);
   1129 +}
   1130 +
   1131 +static float stbir__support_zero(float s)
   1132 +{
   1133 +    STBIR__UNUSED_PARAM(s);
   1134 +    return 0;
   1135 +}
   1136 +
   1137 +static float stbir__support_one(float s)
   1138 +{
   1139 +    STBIR__UNUSED_PARAM(s);
   1140 +    return 1;
   1141 +}
   1142 +
   1143 +static float stbir__support_two(float s)
   1144 +{
   1145 +    STBIR__UNUSED_PARAM(s);
   1146 +    return 2;
   1147 +}
   1148 +
   1149 +static stbir__filter_info stbir__filter_info_table[] = {
   1150 +        { NULL,                     stbir__support_zero },
   1151 +        { stbir__filter_trapezoid,  stbir__support_trapezoid },
   1152 +        { stbir__filter_triangle,   stbir__support_one },
   1153 +        { stbir__filter_cubic,      stbir__support_two },
   1154 +        { stbir__filter_catmullrom, stbir__support_two },
   1155 +        { stbir__filter_mitchell,   stbir__support_two },
   1156 +};
   1157 +
   1158 +stbir__inline static int stbir__use_upsampling(float ratio)
   1159 +{
   1160 +    return ratio > 1;
   1161 +}
   1162 +
   1163 +stbir__inline static int stbir__use_width_upsampling(stbir__info* stbir_info)
   1164 +{
   1165 +    return stbir__use_upsampling(stbir_info->horizontal_scale);
   1166 +}
   1167 +
   1168 +stbir__inline static int stbir__use_height_upsampling(stbir__info* stbir_info)
   1169 +{
   1170 +    return stbir__use_upsampling(stbir_info->vertical_scale);
   1171 +}
   1172 +
   1173 +// This is the maximum number of input samples that can affect an output sample
   1174 +// with the given filter
   1175 +static int stbir__get_filter_pixel_width(stbir_filter filter, float scale)
   1176 +{
   1177 +    STBIR_ASSERT(filter != 0);
   1178 +    STBIR_ASSERT(filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
   1179 +
   1180 +    if (stbir__use_upsampling(scale))
   1181 +        return (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2);
   1182 +    else
   1183 +        return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2 / scale);
   1184 +}
   1185 +
   1186 +// This is how much to expand buffers to account for filters seeking outside
   1187 +// the image boundaries.
   1188 +static int stbir__get_filter_pixel_margin(stbir_filter filter, float scale)
   1189 +{
   1190 +    return stbir__get_filter_pixel_width(filter, scale) / 2;
   1191 +}
   1192 +
   1193 +static int stbir__get_coefficient_width(stbir_filter filter, float scale)
   1194 +{
   1195 +    if (stbir__use_upsampling(scale))
   1196 +        return (int)ceil(stbir__filter_info_table[filter].support(1 / scale) * 2);
   1197 +    else
   1198 +        return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2);
   1199 +}
   1200 +
   1201 +static int stbir__get_contributors(float scale, stbir_filter filter, int input_size, int output_size)
   1202 +{
   1203 +    if (stbir__use_upsampling(scale))
   1204 +        return output_size;
   1205 +    else
   1206 +        return (input_size + stbir__get_filter_pixel_margin(filter, scale) * 2);
   1207 +}
   1208 +
   1209 +static int stbir__get_total_horizontal_coefficients(stbir__info* info)
   1210 +{
   1211 +    return info->horizontal_num_contributors
   1212 +         * stbir__get_coefficient_width      (info->horizontal_filter, info->horizontal_scale);
   1213 +}
   1214 +
   1215 +static int stbir__get_total_vertical_coefficients(stbir__info* info)
   1216 +{
   1217 +    return info->vertical_num_contributors
   1218 +         * stbir__get_coefficient_width      (info->vertical_filter, info->vertical_scale);
   1219 +}
   1220 +
   1221 +static stbir__contributors* stbir__get_contributor(stbir__contributors* contributors, int n)
   1222 +{
   1223 +    return &contributors[n];
   1224 +}
   1225 +
   1226 +// For perf reasons this code is duplicated in stbir__resample_horizontal_upsample/downsample,
   1227 +// if you change it here change it there too.
   1228 +static float* stbir__get_coefficient(float* coefficients, stbir_filter filter, float scale, int n, int c)
   1229 +{
   1230 +    int width = stbir__get_coefficient_width(filter, scale);
   1231 +    return &coefficients[width*n + c];
   1232 +}
   1233 +
   1234 +static int stbir__edge_wrap_slow(stbir_edge edge, int n, int max)
   1235 +{
   1236 +    switch (edge)
   1237 +    {
   1238 +    case STBIR_EDGE_ZERO:
   1239 +        return 0; // we'll decode the wrong pixel here, and then overwrite with 0s later
   1240 +
   1241 +    case STBIR_EDGE_CLAMP:
   1242 +        if (n < 0)
   1243 +            return 0;
   1244 +
   1245 +        if (n >= max)
   1246 +            return max - 1;
   1247 +
   1248 +        return n; // NOTREACHED
   1249 +
   1250 +    case STBIR_EDGE_REFLECT:
   1251 +    {
   1252 +        if (n < 0)
   1253 +        {
   1254 +            if (n < max)
   1255 +                return -n;
   1256 +            else
   1257 +                return max - 1;
   1258 +        }
   1259 +
   1260 +        if (n >= max)
   1261 +        {
   1262 +            int max2 = max * 2;
   1263 +            if (n >= max2)
   1264 +                return 0;
   1265 +            else
   1266 +                return max2 - n - 1;
   1267 +        }
   1268 +
   1269 +        return n; // NOTREACHED
   1270 +    }
   1271 +
   1272 +    case STBIR_EDGE_WRAP:
   1273 +        if (n >= 0)
   1274 +            return (n % max);
   1275 +        else
   1276 +        {
   1277 +            int m = (-n) % max;
   1278 +
   1279 +            if (m != 0)
   1280 +                m = max - m;
   1281 +
   1282 +            return (m);
   1283 +        }
   1284 +        // NOTREACHED
   1285 +
   1286 +    default:
   1287 +        STBIR_ASSERT(!"Unimplemented edge type");
   1288 +        return 0;
   1289 +    }
   1290 +}
   1291 +
   1292 +stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
   1293 +{
   1294 +    // avoid per-pixel switch
   1295 +    if (n >= 0 && n < max)
   1296 +        return n;
   1297 +    return stbir__edge_wrap_slow(edge, n, max);
   1298 +}
   1299 +
   1300 +// What input pixels contribute to this output pixel?
   1301 +static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out)
   1302 +{
   1303 +    float out_pixel_center = (float)n + 0.5f;
   1304 +    float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius;
   1305 +    float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius;
   1306 +
   1307 +    float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) / scale_ratio;
   1308 +    float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) / scale_ratio;
   1309 +
   1310 +    *in_center_of_out = (out_pixel_center + out_shift) / scale_ratio;
   1311 +    *in_first_pixel = (int)(floor(in_pixel_influence_lowerbound + 0.5));
   1312 +    *in_last_pixel = (int)(floor(in_pixel_influence_upperbound - 0.5));
   1313 +}
   1314 +
   1315 +// What output pixels does this input pixel contribute to?
   1316 +static void stbir__calculate_sample_range_downsample(int n, float in_pixels_radius, float scale_ratio, float out_shift, int* out_first_pixel, int* out_last_pixel, float* out_center_of_in)
   1317 +{
   1318 +    float in_pixel_center = (float)n + 0.5f;
   1319 +    float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius;
   1320 +    float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius;
   1321 +
   1322 +    float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale_ratio - out_shift;
   1323 +    float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale_ratio - out_shift;
   1324 +
   1325 +    *out_center_of_in = in_pixel_center * scale_ratio - out_shift;
   1326 +    *out_first_pixel = (int)(floor(out_pixel_influence_lowerbound + 0.5));
   1327 +    *out_last_pixel = (int)(floor(out_pixel_influence_upperbound - 0.5));
   1328 +}
   1329 +
   1330 +static void stbir__calculate_coefficients_upsample(stbir_filter filter, float scale, int in_first_pixel, int in_last_pixel, float in_center_of_out, stbir__contributors* contributor, float* coefficient_group)
   1331 +{
   1332 +    int i;
   1333 +    float total_filter = 0;
   1334 +    float filter_scale;
   1335 +
   1336 +    STBIR_ASSERT(in_last_pixel - in_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
   1337 +
   1338 +    contributor->n0 = in_first_pixel;
   1339 +    contributor->n1 = in_last_pixel;
   1340 +
   1341 +    STBIR_ASSERT(contributor->n1 >= contributor->n0);
   1342 +
   1343 +    for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
   1344 +    {
   1345 +        float in_pixel_center = (float)(i + in_first_pixel) + 0.5f;
   1346 +        coefficient_group[i] = stbir__filter_info_table[filter].kernel(in_center_of_out - in_pixel_center, 1 / scale);
   1347 +
   1348 +        // If the coefficient is zero, skip it. (Don't do the <0 check here, we want the influence of those outside pixels.)
   1349 +        if (i == 0 && !coefficient_group[i])
   1350 +        {
   1351 +            contributor->n0 = ++in_first_pixel;
   1352 +            i--;
   1353 +            continue;
   1354 +        }
   1355 +
   1356 +        total_filter += coefficient_group[i];
   1357 +    }
   1358 +
   1359 +    STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0);
   1360 +
   1361 +    STBIR_ASSERT(total_filter > 0.9);
   1362 +    STBIR_ASSERT(total_filter < 1.1f); // Make sure it's not way off.
   1363 +
   1364 +    // Make sure the sum of all coefficients is 1.
   1365 +    filter_scale = 1 / total_filter;
   1366 +
   1367 +    for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
   1368 +        coefficient_group[i] *= filter_scale;
   1369 +
   1370 +    for (i = in_last_pixel - in_first_pixel; i >= 0; i--)
   1371 +    {
   1372 +        if (coefficient_group[i])
   1373 +            break;
   1374 +
   1375 +        // This line has no weight. We can skip it.
   1376 +        contributor->n1 = contributor->n0 + i - 1;
   1377 +    }
   1378 +}
   1379 +
   1380 +static void stbir__calculate_coefficients_downsample(stbir_filter filter, float scale_ratio, int out_first_pixel, int out_last_pixel, float out_center_of_in, stbir__contributors* contributor, float* coefficient_group)
   1381 +{
   1382 +    int i;
   1383 +
   1384 +     STBIR_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
   1385 +
   1386 +    contributor->n0 = out_first_pixel;
   1387 +    contributor->n1 = out_last_pixel;
   1388 +
   1389 +    STBIR_ASSERT(contributor->n1 >= contributor->n0);
   1390 +
   1391 +    for (i = 0; i <= out_last_pixel - out_first_pixel; i++)
   1392 +    {
   1393 +        float out_pixel_center = (float)(i + out_first_pixel) + 0.5f;
   1394 +        float x = out_pixel_center - out_center_of_in;
   1395 +        coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio;
   1396 +    }
   1397 +
   1398 +    STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0);
   1399 +
   1400 +    for (i = out_last_pixel - out_first_pixel; i >= 0; i--)
   1401 +    {
   1402 +        if (coefficient_group[i])
   1403 +            break;
   1404 +
   1405 +        // This line has no weight. We can skip it.
   1406 +        contributor->n1 = contributor->n0 + i - 1;
   1407 +    }
   1408 +}
   1409 +
   1410 +static void stbir__normalize_downsample_coefficients(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, int input_size, int output_size)
   1411 +{
   1412 +    int num_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
   1413 +    int num_coefficients = stbir__get_coefficient_width(filter, scale_ratio);
   1414 +    int i, j;
   1415 +    int skip;
   1416 +
   1417 +    for (i = 0; i < output_size; i++)
   1418 +    {
   1419 +        float scale;
   1420 +        float total = 0;
   1421 +
   1422 +        for (j = 0; j < num_contributors; j++)
   1423 +        {
   1424 +            if (i >= contributors[j].n0 && i <= contributors[j].n1)
   1425 +            {
   1426 +                float coefficient = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0);
   1427 +                total += coefficient;
   1428 +            }
   1429 +            else if (i < contributors[j].n0)
   1430 +                break;
   1431 +        }
   1432 +
   1433 +        STBIR_ASSERT(total > 0.9f);
   1434 +        STBIR_ASSERT(total < 1.1f);
   1435 +
   1436 +        scale = 1 / total;
   1437 +
   1438 +        for (j = 0; j < num_contributors; j++)
   1439 +        {
   1440 +            if (i >= contributors[j].n0 && i <= contributors[j].n1)
   1441 +                *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0) *= scale;
   1442 +            else if (i < contributors[j].n0)
   1443 +                break;
   1444 +        }
   1445 +    }
   1446 +
   1447 +    // Optimize: Skip zero coefficients and contributions outside of image bounds.
   1448 +    // Do this after normalizing because normalization depends on the n0/n1 values.
   1449 +    for (j = 0; j < num_contributors; j++)
   1450 +    {
   1451 +        int range, max, width;
   1452 +
   1453 +        skip = 0;
   1454 +        while (*stbir__get_coefficient(coefficients, filter, scale_ratio, j, skip) == 0)
   1455 +            skip++;
   1456 +
   1457 +        contributors[j].n0 += skip;
   1458 +
   1459 +        while (contributors[j].n0 < 0)
   1460 +        {
   1461 +            contributors[j].n0++;
   1462 +            skip++;
   1463 +        }
   1464 +
   1465 +        range = contributors[j].n1 - contributors[j].n0 + 1;
   1466 +        max = stbir__min(num_coefficients, range);
   1467 +
   1468 +        width = stbir__get_coefficient_width(filter, scale_ratio);
   1469 +        for (i = 0; i < max; i++)
   1470 +        {
   1471 +            if (i + skip >= width)
   1472 +                break;
   1473 +
   1474 +            *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i) = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i + skip);
   1475 +        }
   1476 +
   1477 +        continue;
   1478 +    }
   1479 +
   1480 +    // Using min to avoid writing into invalid pixels.
   1481 +    for (i = 0; i < num_contributors; i++)
   1482 +        contributors[i].n1 = stbir__min(contributors[i].n1, output_size - 1);
   1483 +}
   1484 +
   1485 +// Each scan line uses the same kernel values so we should calculate the kernel
   1486 +// values once and then we can use them for every scan line.
   1487 +static void stbir__calculate_filters(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size)
   1488 +{
   1489 +    int n;
   1490 +    int total_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
   1491 +
   1492 +    if (stbir__use_upsampling(scale_ratio))
   1493 +    {
   1494 +        float out_pixels_radius = stbir__filter_info_table[filter].support(1 / scale_ratio) * scale_ratio;
   1495 +
   1496 +        // Looping through out pixels
   1497 +        for (n = 0; n < total_contributors; n++)
   1498 +        {
   1499 +            float in_center_of_out; // Center of the current out pixel in the in pixel space
   1500 +            int in_first_pixel, in_last_pixel;
   1501 +
   1502 +            stbir__calculate_sample_range_upsample(n, out_pixels_radius, scale_ratio, shift, &in_first_pixel, &in_last_pixel, &in_center_of_out);
   1503 +
   1504 +            stbir__calculate_coefficients_upsample(filter, scale_ratio, in_first_pixel, in_last_pixel, in_center_of_out, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
   1505 +        }
   1506 +    }
   1507 +    else
   1508 +    {
   1509 +        float in_pixels_radius = stbir__filter_info_table[filter].support(scale_ratio) / scale_ratio;
   1510 +
   1511 +        // Looping through in pixels
   1512 +        for (n = 0; n < total_contributors; n++)
   1513 +        {
   1514 +            float out_center_of_in; // Center of the current out pixel in the in pixel space
   1515 +            int out_first_pixel, out_last_pixel;
   1516 +            int n_adjusted = n - stbir__get_filter_pixel_margin(filter, scale_ratio);
   1517 +
   1518 +            stbir__calculate_sample_range_downsample(n_adjusted, in_pixels_radius, scale_ratio, shift, &out_first_pixel, &out_last_pixel, &out_center_of_in);
   1519 +
   1520 +            stbir__calculate_coefficients_downsample(filter, scale_ratio, out_first_pixel, out_last_pixel, out_center_of_in, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
   1521 +        }
   1522 +
   1523 +        stbir__normalize_downsample_coefficients(contributors, coefficients, filter, scale_ratio, input_size, output_size);
   1524 +    }
   1525 +}
   1526 +
   1527 +static float* stbir__get_decode_buffer(stbir__info* stbir_info)
   1528 +{
   1529 +    // The 0 index of the decode buffer starts after the margin. This makes
   1530 +    // it okay to use negative indexes on the decode buffer.
   1531 +    return &stbir_info->decode_buffer[stbir_info->horizontal_filter_pixel_margin * stbir_info->channels];
   1532 +}
   1533 +
   1534 +#define STBIR__DECODE(type, colorspace) ((int)(type) * (STBIR_MAX_COLORSPACES) + (int)(colorspace))
   1535 +
   1536 +static void stbir__decode_scanline(stbir__info* stbir_info, int n)
   1537 +{
   1538 +    int c;
   1539 +    int channels = stbir_info->channels;
   1540 +    int alpha_channel = stbir_info->alpha_channel;
   1541 +    int type = stbir_info->type;
   1542 +    int colorspace = stbir_info->colorspace;
   1543 +    int input_w = stbir_info->input_w;
   1544 +    size_t input_stride_bytes = stbir_info->input_stride_bytes;
   1545 +    float* decode_buffer = stbir__get_decode_buffer(stbir_info);
   1546 +    stbir_edge edge_horizontal = stbir_info->edge_horizontal;
   1547 +    stbir_edge edge_vertical = stbir_info->edge_vertical;
   1548 +    size_t in_buffer_row_offset = stbir__edge_wrap(edge_vertical, n, stbir_info->input_h) * input_stride_bytes;
   1549 +    const void* input_data = (char *) stbir_info->input_data + in_buffer_row_offset;
   1550 +    int max_x = input_w + stbir_info->horizontal_filter_pixel_margin;
   1551 +    int decode = STBIR__DECODE(type, colorspace);
   1552 +
   1553 +    int x = -stbir_info->horizontal_filter_pixel_margin;
   1554 +
   1555 +    // special handling for STBIR_EDGE_ZERO because it needs to return an item that doesn't appear in the input,
   1556 +    // and we want to avoid paying overhead on every pixel if not STBIR_EDGE_ZERO
   1557 +    if (edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->input_h))
   1558 +    {
   1559 +        for (; x < max_x; x++)
   1560 +            for (c = 0; c < channels; c++)
   1561 +                decode_buffer[x*channels + c] = 0;
   1562 +        return;
   1563 +    }
   1564 +
   1565 +    switch (decode)
   1566 +    {
   1567 +    case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
   1568 +        for (; x < max_x; x++)
   1569 +        {
   1570 +            int decode_pixel_index = x * channels;
   1571 +            int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
   1572 +            for (c = 0; c < channels; c++)
   1573 +                decode_buffer[decode_pixel_index + c] = ((float)((const unsigned char*)input_data)[input_pixel_index + c]) / stbir__max_uint8_as_float;
   1574 +        }
   1575 +        break;
   1576 +
   1577 +    case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
   1578 +        for (; x < max_x; x++)
   1579 +        {
   1580 +            int decode_pixel_index = x * channels;
   1581 +            int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
   1582 +            for (c = 0; c < channels; c++)
   1583 +                decode_buffer[decode_pixel_index + c] = stbir__srgb_uchar_to_linear_float[((const unsigned char*)input_data)[input_pixel_index + c]];
   1584 +
   1585 +            if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
   1586 +                decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned char*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint8_as_float;
   1587 +        }
   1588 +        break;
   1589 +
   1590 +    case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
   1591 +        for (; x < max_x; x++)
   1592 +        {
   1593 +            int decode_pixel_index = x * channels;
   1594 +            int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
   1595 +            for (c = 0; c < channels; c++)
   1596 +                decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float;
   1597 +        }
   1598 +        break;
   1599 +
   1600 +    case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
   1601 +        for (; x < max_x; x++)
   1602 +        {
   1603 +            int decode_pixel_index = x * channels;
   1604 +            int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
   1605 +            for (c = 0; c < channels; c++)
   1606 +                decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float);
   1607 +
   1608 +            if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
   1609 +                decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned short*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint16_as_float;
   1610 +        }
   1611 +        break;
   1612 +
   1613 +    case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
   1614 +        for (; x < max_x; x++)
   1615 +        {
   1616 +            int decode_pixel_index = x * channels;
   1617 +            int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
   1618 +            for (c = 0; c < channels; c++)
   1619 +                decode_buffer[decode_pixel_index + c] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float);
   1620 +        }
   1621 +        break;
   1622 +
   1623 +    case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
   1624 +        for (; x < max_x; x++)
   1625 +        {
   1626 +            int decode_pixel_index = x * channels;
   1627 +            int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
   1628 +            for (c = 0; c < channels; c++)
   1629 +                decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear((float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float));
   1630 +
   1631 +            if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
   1632 +                decode_buffer[decode_pixel_index + alpha_channel] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint32_as_float);
   1633 +        }
   1634 +        break;
   1635 +
   1636 +    case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
   1637 +        for (; x < max_x; x++)
   1638 +        {
   1639 +            int decode_pixel_index = x * channels;
   1640 +            int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
   1641 +            for (c = 0; c < channels; c++)
   1642 +                decode_buffer[decode_pixel_index + c] = ((const float*)input_data)[input_pixel_index + c];
   1643 +        }
   1644 +        break;
   1645 +
   1646 +    case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
   1647 +        for (; x < max_x; x++)
   1648 +        {
   1649 +            int decode_pixel_index = x * channels;
   1650 +            int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
   1651 +            for (c = 0; c < channels; c++)
   1652 +                decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((const float*)input_data)[input_pixel_index + c]);
   1653 +
   1654 +            if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
   1655 +                decode_buffer[decode_pixel_index + alpha_channel] = ((const float*)input_data)[input_pixel_index + alpha_channel];
   1656 +        }
   1657 +
   1658 +        break;
   1659 +
   1660 +    default:
   1661 +        STBIR_ASSERT(!"Unknown type/colorspace/channels combination.");
   1662 +        break;
   1663 +    }
   1664 +
   1665 +    if (!(stbir_info->flags & STBIR_FLAG_ALPHA_PREMULTIPLIED))
   1666 +    {
   1667 +        for (x = -stbir_info->horizontal_filter_pixel_margin; x < max_x; x++)
   1668 +        {
   1669 +            int decode_pixel_index = x * channels;
   1670 +
   1671 +            // If the alpha value is 0 it will clobber the color values. Make sure it's not.
   1672 +            float alpha = decode_buffer[decode_pixel_index + alpha_channel];
   1673 +#ifndef STBIR_NO_ALPHA_EPSILON
   1674 +            if (stbir_info->type != STBIR_TYPE_FLOAT) {
   1675 +                alpha += STBIR_ALPHA_EPSILON;
   1676 +                decode_buffer[decode_pixel_index + alpha_channel] = alpha;
   1677 +            }
   1678 +#endif
   1679 +            for (c = 0; c < channels; c++)
   1680 +            {
   1681 +                if (c == alpha_channel)
   1682 +                    continue;
   1683 +
   1684 +                decode_buffer[decode_pixel_index + c] *= alpha;
   1685 +            }
   1686 +        }
   1687 +    }
   1688 +
   1689 +    if (edge_horizontal == STBIR_EDGE_ZERO)
   1690 +    {
   1691 +        for (x = -stbir_info->horizontal_filter_pixel_margin; x < 0; x++)
   1692 +        {
   1693 +            for (c = 0; c < channels; c++)
   1694 +                decode_buffer[x*channels + c] = 0;
   1695 +        }
   1696 +        for (x = input_w; x < max_x; x++)
   1697 +        {
   1698 +            for (c = 0; c < channels; c++)
   1699 +                decode_buffer[x*channels + c] = 0;
   1700 +        }
   1701 +    }
   1702 +}
   1703 +
   1704 +static float* stbir__get_ring_buffer_entry(float* ring_buffer, int index, int ring_buffer_length)
   1705 +{
   1706 +    return &ring_buffer[index * ring_buffer_length];
   1707 +}
   1708 +
   1709 +static float* stbir__add_empty_ring_buffer_entry(stbir__info* stbir_info, int n)
   1710 +{
   1711 +    int ring_buffer_index;
   1712 +    float* ring_buffer;
   1713 +
   1714 +    stbir_info->ring_buffer_last_scanline = n;
   1715 +
   1716 +    if (stbir_info->ring_buffer_begin_index < 0)
   1717 +    {
   1718 +        ring_buffer_index = stbir_info->ring_buffer_begin_index = 0;
   1719 +        stbir_info->ring_buffer_first_scanline = n;
   1720 +    }
   1721 +    else
   1722 +    {
   1723 +        ring_buffer_index = (stbir_info->ring_buffer_begin_index + (stbir_info->ring_buffer_last_scanline - stbir_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries;
   1724 +        STBIR_ASSERT(ring_buffer_index != stbir_info->ring_buffer_begin_index);
   1725 +    }
   1726 +
   1727 +    ring_buffer = stbir__get_ring_buffer_entry(stbir_info->ring_buffer, ring_buffer_index, stbir_info->ring_buffer_length_bytes / sizeof(float));
   1728 +    memset(ring_buffer, 0, stbir_info->ring_buffer_length_bytes);
   1729 +
   1730 +    return ring_buffer;
   1731 +}
   1732 +
   1733 +
   1734 +static void stbir__resample_horizontal_upsample(stbir__info* stbir_info, float* output_buffer)
   1735 +{
   1736 +    int x, k;
   1737 +    int output_w = stbir_info->output_w;
   1738 +    int channels = stbir_info->channels;
   1739 +    float* decode_buffer = stbir__get_decode_buffer(stbir_info);
   1740 +    stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
   1741 +    float* horizontal_coefficients = stbir_info->horizontal_coefficients;
   1742 +    int coefficient_width = stbir_info->horizontal_coefficient_width;
   1743 +
   1744 +    for (x = 0; x < output_w; x++)
   1745 +    {
   1746 +        int n0 = horizontal_contributors[x].n0;
   1747 +        int n1 = horizontal_contributors[x].n1;
   1748 +
   1749 +        int out_pixel_index = x * channels;
   1750 +        int coefficient_group = coefficient_width * x;
   1751 +        int coefficient_counter = 0;
   1752 +
   1753 +        STBIR_ASSERT(n1 >= n0);
   1754 +        STBIR_ASSERT(n0 >= -stbir_info->horizontal_filter_pixel_margin);
   1755 +        STBIR_ASSERT(n1 >= -stbir_info->horizontal_filter_pixel_margin);
   1756 +        STBIR_ASSERT(n0 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
   1757 +        STBIR_ASSERT(n1 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
   1758 +
   1759 +        switch (channels) {
   1760 +            case 1:
   1761 +                for (k = n0; k <= n1; k++)
   1762 +                {
   1763 +                    int in_pixel_index = k * 1;
   1764 +                    float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
   1765 +                    STBIR_ASSERT(coefficient != 0);
   1766 +                    output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
   1767 +                }
   1768 +                break;
   1769 +            case 2:
   1770 +                for (k = n0; k <= n1; k++)
   1771 +                {
   1772 +                    int in_pixel_index = k * 2;
   1773 +                    float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
   1774 +                    STBIR_ASSERT(coefficient != 0);
   1775 +                    output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
   1776 +                    output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
   1777 +                }
   1778 +                break;
   1779 +            case 3:
   1780 +                for (k = n0; k <= n1; k++)
   1781 +                {
   1782 +                    int in_pixel_index = k * 3;
   1783 +                    float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
   1784 +                    STBIR_ASSERT(coefficient != 0);
   1785 +                    output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
   1786 +                    output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
   1787 +                    output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
   1788 +                }
   1789 +                break;
   1790 +            case 4:
   1791 +                for (k = n0; k <= n1; k++)
   1792 +                {
   1793 +                    int in_pixel_index = k * 4;
   1794 +                    float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
   1795 +                    STBIR_ASSERT(coefficient != 0);
   1796 +                    output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
   1797 +                    output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
   1798 +                    output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
   1799 +                    output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
   1800 +                }
   1801 +                break;
   1802 +            default:
   1803 +                for (k = n0; k <= n1; k++)
   1804 +                {
   1805 +                    int in_pixel_index = k * channels;
   1806 +                    float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
   1807 +                    int c;
   1808 +                    STBIR_ASSERT(coefficient != 0);
   1809 +                    for (c = 0; c < channels; c++)
   1810 +                        output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
   1811 +                }
   1812 +                break;
   1813 +        }
   1814 +    }
   1815 +}
   1816 +
   1817 +static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float* output_buffer)
   1818 +{
   1819 +    int x, k;
   1820 +    int input_w = stbir_info->input_w;
   1821 +    int channels = stbir_info->channels;
   1822 +    float* decode_buffer = stbir__get_decode_buffer(stbir_info);
   1823 +    stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
   1824 +    float* horizontal_coefficients = stbir_info->horizontal_coefficients;
   1825 +    int coefficient_width = stbir_info->horizontal_coefficient_width;
   1826 +    int filter_pixel_margin = stbir_info->horizontal_filter_pixel_margin;
   1827 +    int max_x = input_w + filter_pixel_margin * 2;
   1828 +
   1829 +    STBIR_ASSERT(!stbir__use_width_upsampling(stbir_info));
   1830 +
   1831 +    switch (channels) {
   1832 +        case 1:
   1833 +            for (x = 0; x < max_x; x++)
   1834 +            {
   1835 +                int n0 = horizontal_contributors[x].n0;
   1836 +                int n1 = horizontal_contributors[x].n1;
   1837 +
   1838 +                int in_x = x - filter_pixel_margin;
   1839 +                int in_pixel_index = in_x * 1;
   1840 +                int max_n = n1;
   1841 +                int coefficient_group = coefficient_width * x;
   1842 +
   1843 +                for (k = n0; k <= max_n; k++)
   1844 +                {
   1845 +                    int out_pixel_index = k * 1;
   1846 +                    float coefficient = horizontal_coefficients[coefficient_group + k - n0];
   1847 +                    STBIR_ASSERT(coefficient != 0);
   1848 +                    output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
   1849 +                }
   1850 +            }
   1851 +            break;
   1852 +
   1853 +        case 2:
   1854 +            for (x = 0; x < max_x; x++)
   1855 +            {
   1856 +                int n0 = horizontal_contributors[x].n0;
   1857 +                int n1 = horizontal_contributors[x].n1;
   1858 +
   1859 +                int in_x = x - filter_pixel_margin;
   1860 +                int in_pixel_index = in_x * 2;
   1861 +                int max_n = n1;
   1862 +                int coefficient_group = coefficient_width * x;
   1863 +
   1864 +                for (k = n0; k <= max_n; k++)
   1865 +                {
   1866 +                    int out_pixel_index = k * 2;
   1867 +                    float coefficient = horizontal_coefficients[coefficient_group + k - n0];
   1868 +                    STBIR_ASSERT(coefficient != 0);
   1869 +                    output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
   1870 +                    output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
   1871 +                }
   1872 +            }
   1873 +            break;
   1874 +
   1875 +        case 3:
   1876 +            for (x = 0; x < max_x; x++)
   1877 +            {
   1878 +                int n0 = horizontal_contributors[x].n0;
   1879 +                int n1 = horizontal_contributors[x].n1;
   1880 +
   1881 +                int in_x = x - filter_pixel_margin;
   1882 +                int in_pixel_index = in_x * 3;
   1883 +                int max_n = n1;
   1884 +                int coefficient_group = coefficient_width * x;
   1885 +
   1886 +                for (k = n0; k <= max_n; k++)
   1887 +                {
   1888 +                    int out_pixel_index = k * 3;
   1889 +                    float coefficient = horizontal_coefficients[coefficient_group + k - n0];
   1890 +                    STBIR_ASSERT(coefficient != 0);
   1891 +                    output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
   1892 +                    output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
   1893 +                    output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
   1894 +                }
   1895 +            }
   1896 +            break;
   1897 +
   1898 +        case 4:
   1899 +            for (x = 0; x < max_x; x++)
   1900 +            {
   1901 +                int n0 = horizontal_contributors[x].n0;
   1902 +                int n1 = horizontal_contributors[x].n1;
   1903 +
   1904 +                int in_x = x - filter_pixel_margin;
   1905 +                int in_pixel_index = in_x * 4;
   1906 +                int max_n = n1;
   1907 +                int coefficient_group = coefficient_width * x;
   1908 +
   1909 +                for (k = n0; k <= max_n; k++)
   1910 +                {
   1911 +                    int out_pixel_index = k * 4;
   1912 +                    float coefficient = horizontal_coefficients[coefficient_group + k - n0];
   1913 +                    STBIR_ASSERT(coefficient != 0);
   1914 +                    output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
   1915 +                    output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
   1916 +                    output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
   1917 +                    output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
   1918 +                }
   1919 +            }
   1920 +            break;
   1921 +
   1922 +        default:
   1923 +            for (x = 0; x < max_x; x++)
   1924 +            {
   1925 +                int n0 = horizontal_contributors[x].n0;
   1926 +                int n1 = horizontal_contributors[x].n1;
   1927 +
   1928 +                int in_x = x - filter_pixel_margin;
   1929 +                int in_pixel_index = in_x * channels;
   1930 +                int max_n = n1;
   1931 +                int coefficient_group = coefficient_width * x;
   1932 +
   1933 +                for (k = n0; k <= max_n; k++)
   1934 +                {
   1935 +                    int c;
   1936 +                    int out_pixel_index = k * channels;
   1937 +                    float coefficient = horizontal_coefficients[coefficient_group + k - n0];
   1938 +                    STBIR_ASSERT(coefficient != 0);
   1939 +                    for (c = 0; c < channels; c++)
   1940 +                        output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
   1941 +                }
   1942 +            }
   1943 +            break;
   1944 +    }
   1945 +}
   1946 +
   1947 +static void stbir__decode_and_resample_upsample(stbir__info* stbir_info, int n)
   1948 +{
   1949 +    // Decode the nth scanline from the source image into the decode buffer.
   1950 +    stbir__decode_scanline(stbir_info, n);
   1951 +
   1952 +    // Now resample it into the ring buffer.
   1953 +    if (stbir__use_width_upsampling(stbir_info))
   1954 +        stbir__resample_horizontal_upsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
   1955 +    else
   1956 +        stbir__resample_horizontal_downsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
   1957 +
   1958 +    // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling.
   1959 +}
   1960 +
   1961 +static void stbir__decode_and_resample_downsample(stbir__info* stbir_info, int n)
   1962 +{
   1963 +    // Decode the nth scanline from the source image into the decode buffer.
   1964 +    stbir__decode_scanline(stbir_info, n);
   1965 +
   1966 +    memset(stbir_info->horizontal_buffer, 0, stbir_info->output_w * stbir_info->channels * sizeof(float));
   1967 +
   1968 +    // Now resample it into the horizontal buffer.
   1969 +    if (stbir__use_width_upsampling(stbir_info))
   1970 +        stbir__resample_horizontal_upsample(stbir_info, stbir_info->horizontal_buffer);
   1971 +    else
   1972 +        stbir__resample_horizontal_downsample(stbir_info, stbir_info->horizontal_buffer);
   1973 +
   1974 +    // Now it's sitting in the horizontal buffer ready to be distributed into the ring buffers.
   1975 +}
   1976 +
   1977 +// Get the specified scan line from the ring buffer.
   1978 +static float* stbir__get_ring_buffer_scanline(int get_scanline, float* ring_buffer, int begin_index, int first_scanline, int ring_buffer_num_entries, int ring_buffer_length)
   1979 +{
   1980 +    int ring_buffer_index = (begin_index + (get_scanline - first_scanline)) % ring_buffer_num_entries;
   1981 +    return stbir__get_ring_buffer_entry(ring_buffer, ring_buffer_index, ring_buffer_length);
   1982 +}
   1983 +
   1984 +
   1985 +static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void *output_buffer, float *encode_buffer, int channels, int alpha_channel, int decode)
   1986 +{
   1987 +    int x;
   1988 +    int n;
   1989 +    int num_nonalpha;
   1990 +    stbir_uint16 nonalpha[STBIR_MAX_CHANNELS];
   1991 +
   1992 +    if (!(stbir_info->flags&STBIR_FLAG_ALPHA_PREMULTIPLIED))
   1993 +    {
   1994 +        for (x=0; x < num_pixels; ++x)
   1995 +        {
   1996 +            int pixel_index = x*channels;
   1997 +
   1998 +            float alpha = encode_buffer[pixel_index + alpha_channel];
   1999 +            float reciprocal_alpha = alpha ? 1.0f / alpha : 0;
   2000 +
   2001 +            // unrolling this produced a 1% slowdown upscaling a large RGBA linear-space image on my machine - stb
   2002 +            for (n = 0; n < channels; n++)
   2003 +                if (n != alpha_channel)
   2004 +                    encode_buffer[pixel_index + n] *= reciprocal_alpha;
   2005 +
   2006 +            // We added in a small epsilon to prevent the color channel from being deleted with zero alpha.
   2007 +            // Because we only add it for integer types, it will automatically be discarded on integer
   2008 +            // conversion, so we don't need to subtract it back out (which would be problematic for
   2009 +            // numeric precision reasons).
   2010 +        }
   2011 +    }
   2012 +
   2013 +    // build a table of all channels that need colorspace correction, so
   2014 +    // we don't perform colorspace correction on channels that don't need it.
   2015 +    for (x = 0, num_nonalpha = 0; x < channels; ++x)
   2016 +    {
   2017 +        if (x != alpha_channel || (stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
   2018 +        {
   2019 +            nonalpha[num_nonalpha++] = (stbir_uint16)x;
   2020 +        }
   2021 +    }
   2022 +
   2023 +    #define STBIR__ROUND_INT(f)    ((int)          ((f)+0.5))
   2024 +    #define STBIR__ROUND_UINT(f)   ((stbir_uint32) ((f)+0.5))
   2025 +
   2026 +    #ifdef STBIR__SATURATE_INT
   2027 +    #define STBIR__ENCODE_LINEAR8(f)   stbir__saturate8 (STBIR__ROUND_INT((f) * stbir__max_uint8_as_float ))
   2028 +    #define STBIR__ENCODE_LINEAR16(f)  stbir__saturate16(STBIR__ROUND_INT((f) * stbir__max_uint16_as_float))
   2029 +    #else
   2030 +    #define STBIR__ENCODE_LINEAR8(f)   (unsigned char ) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint8_as_float )
   2031 +    #define STBIR__ENCODE_LINEAR16(f)  (unsigned short) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint16_as_float)
   2032 +    #endif
   2033 +
   2034 +    switch (decode)
   2035 +    {
   2036 +        case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
   2037 +            for (x=0; x < num_pixels; ++x)
   2038 +            {
   2039 +                int pixel_index = x*channels;
   2040 +
   2041 +                for (n = 0; n < channels; n++)
   2042 +                {
   2043 +                    int index = pixel_index + n;
   2044 +                    ((unsigned char*)output_buffer)[index] = STBIR__ENCODE_LINEAR8(encode_buffer[index]);
   2045 +                }
   2046 +            }
   2047 +            break;
   2048 +
   2049 +        case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
   2050 +            for (x=0; x < num_pixels; ++x)
   2051 +            {
   2052 +                int pixel_index = x*channels;
   2053 +
   2054 +                for (n = 0; n < num_nonalpha; n++)
   2055 +                {
   2056 +                    int index = pixel_index + nonalpha[n];
   2057 +                    ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]);
   2058 +                }
   2059 +
   2060 +                if (!(stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
   2061 +                    ((unsigned char *)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR8(encode_buffer[pixel_index+alpha_channel]);
   2062 +            }
   2063 +            break;
   2064 +
   2065 +        case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
   2066 +            for (x=0; x < num_pixels; ++x)
   2067 +            {
   2068 +                int pixel_index = x*channels;
   2069 +
   2070 +                for (n = 0; n < channels; n++)
   2071 +                {
   2072 +                    int index = pixel_index + n;
   2073 +                    ((unsigned short*)output_buffer)[index] = STBIR__ENCODE_LINEAR16(encode_buffer[index]);
   2074 +                }
   2075 +            }
   2076 +            break;
   2077 +
   2078 +        case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
   2079 +            for (x=0; x < num_pixels; ++x)
   2080 +            {
   2081 +                int pixel_index = x*channels;
   2082 +
   2083 +                for (n = 0; n < num_nonalpha; n++)
   2084 +                {
   2085 +                    int index = pixel_index + nonalpha[n];
   2086 +                    ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * stbir__max_uint16_as_float);
   2087 +                }
   2088 +
   2089 +                if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
   2090 +                    ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR16(encode_buffer[pixel_index + alpha_channel]);
   2091 +            }
   2092 +
   2093 +            break;
   2094 +
   2095 +        case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
   2096 +            for (x=0; x < num_pixels; ++x)
   2097 +            {
   2098 +                int pixel_index = x*channels;
   2099 +
   2100 +                for (n = 0; n < channels; n++)
   2101 +                {
   2102 +                    int index = pixel_index + n;
   2103 +                    ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__saturate(encode_buffer[index])) * stbir__max_uint32_as_float);
   2104 +                }
   2105 +            }
   2106 +            break;
   2107 +
   2108 +        case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
   2109 +            for (x=0; x < num_pixels; ++x)
   2110 +            {
   2111 +                int pixel_index = x*channels;
   2112 +
   2113 +                for (n = 0; n < num_nonalpha; n++)
   2114 +                {
   2115 +                    int index = pixel_index + nonalpha[n];
   2116 +                    ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * stbir__max_uint32_as_float);
   2117 +                }
   2118 +
   2119 +                if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
   2120 +                    ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * stbir__max_uint32_as_float);
   2121 +            }
   2122 +            break;
   2123 +
   2124 +        case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
   2125 +            for (x=0; x < num_pixels; ++x)
   2126 +            {
   2127 +                int pixel_index = x*channels;
   2128 +
   2129 +                for (n = 0; n < channels; n++)
   2130 +                {
   2131 +                    int index = pixel_index + n;
   2132 +                    ((float*)output_buffer)[index] = encode_buffer[index];
   2133 +                }
   2134 +            }
   2135 +            break;
   2136 +
   2137 +        case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
   2138 +            for (x=0; x < num_pixels; ++x)
   2139 +            {
   2140 +                int pixel_index = x*channels;
   2141 +
   2142 +                for (n = 0; n < num_nonalpha; n++)
   2143 +                {
   2144 +                    int index = pixel_index + nonalpha[n];
   2145 +                    ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]);
   2146 +                }
   2147 +
   2148 +                if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
   2149 +                    ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel];
   2150 +            }
   2151 +            break;
   2152 +
   2153 +        default:
   2154 +            STBIR_ASSERT(!"Unknown type/colorspace/channels combination.");
   2155 +            break;
   2156 +    }
   2157 +}
   2158 +
   2159 +static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n)
   2160 +{
   2161 +    int x, k;
   2162 +    int output_w = stbir_info->output_w;
   2163 +    stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
   2164 +    float* vertical_coefficients = stbir_info->vertical_coefficients;
   2165 +    int channels = stbir_info->channels;
   2166 +    int alpha_channel = stbir_info->alpha_channel;
   2167 +    int type = stbir_info->type;
   2168 +    int colorspace = stbir_info->colorspace;
   2169 +    int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
   2170 +    void* output_data = stbir_info->output_data;
   2171 +    float* encode_buffer = stbir_info->encode_buffer;
   2172 +    int decode = STBIR__DECODE(type, colorspace);
   2173 +    int coefficient_width = stbir_info->vertical_coefficient_width;
   2174 +    int coefficient_counter;
   2175 +    int contributor = n;
   2176 +
   2177 +    float* ring_buffer = stbir_info->ring_buffer;
   2178 +    int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
   2179 +    int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
   2180 +    int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
   2181 +
   2182 +    int n0,n1, output_row_start;
   2183 +    int coefficient_group = coefficient_width * contributor;
   2184 +
   2185 +    n0 = vertical_contributors[contributor].n0;
   2186 +    n1 = vertical_contributors[contributor].n1;
   2187 +
   2188 +    output_row_start = n * stbir_info->output_stride_bytes;
   2189 +
   2190 +    STBIR_ASSERT(stbir__use_height_upsampling(stbir_info));
   2191 +
   2192 +    memset(encode_buffer, 0, output_w * sizeof(float) * channels);
   2193 +
   2194 +    // I tried reblocking this for better cache usage of encode_buffer
   2195 +    // (using x_outer, k, x_inner), but it lost speed. -- stb
   2196 +
   2197 +    coefficient_counter = 0;
   2198 +    switch (channels) {
   2199 +        case 1:
   2200 +            for (k = n0; k <= n1; k++)
   2201 +            {
   2202 +                int coefficient_index = coefficient_counter++;
   2203 +                float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
   2204 +                float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
   2205 +                for (x = 0; x < output_w; ++x)
   2206 +                {
   2207 +                    int in_pixel_index = x * 1;
   2208 +                    encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
   2209 +                }
   2210 +            }
   2211 +            break;
   2212 +        case 2:
   2213 +            for (k = n0; k <= n1; k++)
   2214 +            {
   2215 +                int coefficient_index = coefficient_counter++;
   2216 +                float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
   2217 +                float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
   2218 +                for (x = 0; x < output_w; ++x)
   2219 +                {
   2220 +                    int in_pixel_index = x * 2;
   2221 +                    encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
   2222 +                    encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
   2223 +                }
   2224 +            }
   2225 +            break;
   2226 +        case 3:
   2227 +            for (k = n0; k <= n1; k++)
   2228 +            {
   2229 +                int coefficient_index = coefficient_counter++;
   2230 +                float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
   2231 +                float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
   2232 +                for (x = 0; x < output_w; ++x)
   2233 +                {
   2234 +                    int in_pixel_index = x * 3;
   2235 +                    encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
   2236 +                    encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
   2237 +                    encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
   2238 +                }
   2239 +            }
   2240 +            break;
   2241 +        case 4:
   2242 +            for (k = n0; k <= n1; k++)
   2243 +            {
   2244 +                int coefficient_index = coefficient_counter++;
   2245 +                float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
   2246 +                float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
   2247 +                for (x = 0; x < output_w; ++x)
   2248 +                {
   2249 +                    int in_pixel_index = x * 4;
   2250 +                    encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
   2251 +                    encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
   2252 +                    encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
   2253 +                    encode_buffer[in_pixel_index + 3] += ring_buffer_entry[in_pixel_index + 3] * coefficient;
   2254 +                }
   2255 +            }
   2256 +            break;
   2257 +        default:
   2258 +            for (k = n0; k <= n1; k++)
   2259 +            {
   2260 +                int coefficient_index = coefficient_counter++;
   2261 +                float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
   2262 +                float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
   2263 +                for (x = 0; x < output_w; ++x)
   2264 +                {
   2265 +                    int in_pixel_index = x * channels;
   2266 +                    int c;
   2267 +                    for (c = 0; c < channels; c++)
   2268 +                        encode_buffer[in_pixel_index + c] += ring_buffer_entry[in_pixel_index + c] * coefficient;
   2269 +                }
   2270 +            }
   2271 +            break;
   2272 +    }
   2273 +    stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, encode_buffer, channels, alpha_channel, decode);
   2274 +}
   2275 +
   2276 +static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n)
   2277 +{
   2278 +    int x, k;
   2279 +    int output_w = stbir_info->output_w;
   2280 +    stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
   2281 +    float* vertical_coefficients = stbir_info->vertical_coefficients;
   2282 +    int channels = stbir_info->channels;
   2283 +    int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
   2284 +    float* horizontal_buffer = stbir_info->horizontal_buffer;
   2285 +    int coefficient_width = stbir_info->vertical_coefficient_width;
   2286 +    int contributor = n + stbir_info->vertical_filter_pixel_margin;
   2287 +
   2288 +    float* ring_buffer = stbir_info->ring_buffer;
   2289 +    int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
   2290 +    int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
   2291 +    int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
   2292 +    int n0,n1;
   2293 +
   2294 +    n0 = vertical_contributors[contributor].n0;
   2295 +    n1 = vertical_contributors[contributor].n1;
   2296 +
   2297 +    STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info));
   2298 +
   2299 +    for (k = n0; k <= n1; k++)
   2300 +    {
   2301 +        int coefficient_index = k - n0;
   2302 +        int coefficient_group = coefficient_width * contributor;
   2303 +        float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
   2304 +
   2305 +        float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
   2306 +
   2307 +        switch (channels) {
   2308 +            case 1:
   2309 +                for (x = 0; x < output_w; x++)
   2310 +                {
   2311 +                    int in_pixel_index = x * 1;
   2312 +                    ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
   2313 +                }
   2314 +                break;
   2315 +            case 2:
   2316 +                for (x = 0; x < output_w; x++)
   2317 +                {
   2318 +                    int in_pixel_index = x * 2;
   2319 +                    ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
   2320 +                    ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
   2321 +                }
   2322 +                break;
   2323 +            case 3:
   2324 +                for (x = 0; x < output_w; x++)
   2325 +                {
   2326 +                    int in_pixel_index = x * 3;
   2327 +                    ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
   2328 +                    ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
   2329 +                    ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
   2330 +                }
   2331 +                break;
   2332 +            case 4:
   2333 +                for (x = 0; x < output_w; x++)
   2334 +                {
   2335 +                    int in_pixel_index = x * 4;
   2336 +                    ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
   2337 +                    ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
   2338 +                    ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
   2339 +                    ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient;
   2340 +                }
   2341 +                break;
   2342 +            default:
   2343 +                for (x = 0; x < output_w; x++)
   2344 +                {
   2345 +                    int in_pixel_index = x * channels;
   2346 +
   2347 +                    int c;
   2348 +                    for (c = 0; c < channels; c++)
   2349 +                        ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient;
   2350 +                }
   2351 +                break;
   2352 +        }
   2353 +    }
   2354 +}
   2355 +
   2356 +static void stbir__buffer_loop_upsample(stbir__info* stbir_info)
   2357 +{
   2358 +    int y;
   2359 +    float scale_ratio = stbir_info->vertical_scale;
   2360 +    float out_scanlines_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(1/scale_ratio) * scale_ratio;
   2361 +
   2362 +    STBIR_ASSERT(stbir__use_height_upsampling(stbir_info));
   2363 +
   2364 +    for (y = 0; y < stbir_info->output_h; y++)
   2365 +    {
   2366 +        float in_center_of_out = 0; // Center of the current out scanline in the in scanline space
   2367 +        int in_first_scanline = 0, in_last_scanline = 0;
   2368 +
   2369 +        stbir__calculate_sample_range_upsample(y, out_scanlines_radius, scale_ratio, stbir_info->vertical_shift, &in_first_scanline, &in_last_scanline, &in_center_of_out);
   2370 +
   2371 +        STBIR_ASSERT(in_last_scanline - in_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
   2372 +
   2373 +        if (stbir_info->ring_buffer_begin_index >= 0)
   2374 +        {
   2375 +            // Get rid of whatever we don't need anymore.
   2376 +            while (in_first_scanline > stbir_info->ring_buffer_first_scanline)
   2377 +            {
   2378 +                if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
   2379 +                {
   2380 +                    // We just popped the last scanline off the ring buffer.
   2381 +                    // Reset it to the empty state.
   2382 +                    stbir_info->ring_buffer_begin_index = -1;
   2383 +                    stbir_info->ring_buffer_first_scanline = 0;
   2384 +                    stbir_info->ring_buffer_last_scanline = 0;
   2385 +                    break;
   2386 +                }
   2387 +                else
   2388 +                {
   2389 +                    stbir_info->ring_buffer_first_scanline++;
   2390 +                    stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
   2391 +                }
   2392 +            }
   2393 +        }
   2394 +
   2395 +        // Load in new ones.
   2396 +        if (stbir_info->ring_buffer_begin_index < 0)
   2397 +            stbir__decode_and_resample_upsample(stbir_info, in_first_scanline);
   2398 +
   2399 +        while (in_last_scanline > stbir_info->ring_buffer_last_scanline)
   2400 +            stbir__decode_and_resample_upsample(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
   2401 +
   2402 +        // Now all buffers should be ready to write a row of vertical sampling.
   2403 +        stbir__resample_vertical_upsample(stbir_info, y);
   2404 +
   2405 +        STBIR_PROGRESS_REPORT((float)y / stbir_info->output_h);
   2406 +    }
   2407 +}
   2408 +
   2409 +static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessary_scanline)
   2410 +{
   2411 +    int output_stride_bytes = stbir_info->output_stride_bytes;
   2412 +    int channels = stbir_info->channels;
   2413 +    int alpha_channel = stbir_info->alpha_channel;
   2414 +    int type = stbir_info->type;
   2415 +    int colorspace = stbir_info->colorspace;
   2416 +    int output_w = stbir_info->output_w;
   2417 +    void* output_data = stbir_info->output_data;
   2418 +    int decode = STBIR__DECODE(type, colorspace);
   2419 +
   2420 +    float* ring_buffer = stbir_info->ring_buffer;
   2421 +    int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
   2422 +
   2423 +    if (stbir_info->ring_buffer_begin_index >= 0)
   2424 +    {
   2425 +        // Get rid of whatever we don't need anymore.
   2426 +        while (first_necessary_scanline > stbir_info->ring_buffer_first_scanline)
   2427 +        {
   2428 +            if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h)
   2429 +            {
   2430 +                int output_row_start = stbir_info->ring_buffer_first_scanline * output_stride_bytes;
   2431 +                float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length);
   2432 +                stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, ring_buffer_entry, channels, alpha_channel, decode);
   2433 +                STBIR_PROGRESS_REPORT((float)stbir_info->ring_buffer_first_scanline / stbir_info->output_h);
   2434 +            }
   2435 +
   2436 +            if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
   2437 +            {
   2438 +                // We just popped the last scanline off the ring buffer.
   2439 +                // Reset it to the empty state.
   2440 +                stbir_info->ring_buffer_begin_index = -1;
   2441 +                stbir_info->ring_buffer_first_scanline = 0;
   2442 +                stbir_info->ring_buffer_last_scanline = 0;
   2443 +                break;
   2444 +            }
   2445 +            else
   2446 +            {
   2447 +                stbir_info->ring_buffer_first_scanline++;
   2448 +                stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
   2449 +            }
   2450 +        }
   2451 +    }
   2452 +}
   2453 +
   2454 +static void stbir__buffer_loop_downsample(stbir__info* stbir_info)
   2455 +{
   2456 +    int y;
   2457 +    float scale_ratio = stbir_info->vertical_scale;
   2458 +    int output_h = stbir_info->output_h;
   2459 +    float in_pixels_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(scale_ratio) / scale_ratio;
   2460 +    int pixel_margin = stbir_info->vertical_filter_pixel_margin;
   2461 +    int max_y = stbir_info->input_h + pixel_margin;
   2462 +
   2463 +    STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info));
   2464 +
   2465 +    for (y = -pixel_margin; y < max_y; y++)
   2466 +    {
   2467 +        float out_center_of_in; // Center of the current out scanline in the in scanline space
   2468 +        int out_first_scanline, out_last_scanline;
   2469 +
   2470 +        stbir__calculate_sample_range_downsample(y, in_pixels_radius, scale_ratio, stbir_info->vertical_shift, &out_first_scanline, &out_last_scanline, &out_center_of_in);
   2471 +
   2472 +        STBIR_ASSERT(out_last_scanline - out_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
   2473 +
   2474 +        if (out_last_scanline < 0 || out_first_scanline >= output_h)
   2475 +            continue;
   2476 +
   2477 +        stbir__empty_ring_buffer(stbir_info, out_first_scanline);
   2478 +
   2479 +        stbir__decode_and_resample_downsample(stbir_info, y);
   2480 +
   2481 +        // Load in new ones.
   2482 +        if (stbir_info->ring_buffer_begin_index < 0)
   2483 +            stbir__add_empty_ring_buffer_entry(stbir_info, out_first_scanline);
   2484 +
   2485 +        while (out_last_scanline > stbir_info->ring_buffer_last_scanline)
   2486 +            stbir__add_empty_ring_buffer_entry(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
   2487 +
   2488 +        // Now the horizontal buffer is ready to write to all ring buffer rows.
   2489 +        stbir__resample_vertical_downsample(stbir_info, y);
   2490 +    }
   2491 +
   2492 +    stbir__empty_ring_buffer(stbir_info, stbir_info->output_h);
   2493 +}
   2494 +
   2495 +static void stbir__setup(stbir__info *info, int input_w, int input_h, int output_w, int output_h, int channels)
   2496 +{
   2497 +    info->input_w = input_w;
   2498 +    info->input_h = input_h;
   2499 +    info->output_w = output_w;
   2500 +    info->output_h = output_h;
   2501 +    info->channels = channels;
   2502 +}
   2503 +
   2504 +static void stbir__calculate_transform(stbir__info *info, float s0, float t0, float s1, float t1, float *transform)
   2505 +{
   2506 +    info->s0 = s0;
   2507 +    info->t0 = t0;
   2508 +    info->s1 = s1;
   2509 +    info->t1 = t1;
   2510 +
   2511 +    if (transform)
   2512 +    {
   2513 +        info->horizontal_scale = transform[0];
   2514 +        info->vertical_scale   = transform[1];
   2515 +        info->horizontal_shift = transform[2];
   2516 +        info->vertical_shift   = transform[3];
   2517 +    }
   2518 +    else
   2519 +    {
   2520 +        info->horizontal_scale = ((float)info->output_w / info->input_w) / (s1 - s0);
   2521 +        info->vertical_scale = ((float)info->output_h / info->input_h) / (t1 - t0);
   2522 +
   2523 +        info->horizontal_shift = s0 * info->output_w / (s1 - s0);
   2524 +        info->vertical_shift = t0 * info->output_h / (t1 - t0);
   2525 +    }
   2526 +}
   2527 +
   2528 +static void stbir__choose_filter(stbir__info *info, stbir_filter h_filter, stbir_filter v_filter)
   2529 +{
   2530 +    if (h_filter == 0)
   2531 +        h_filter = stbir__use_upsampling(info->horizontal_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
   2532 +    if (v_filter == 0)
   2533 +        v_filter = stbir__use_upsampling(info->vertical_scale)   ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
   2534 +    info->horizontal_filter = h_filter;
   2535 +    info->vertical_filter = v_filter;
   2536 +}
   2537 +
   2538 +static stbir_uint32 stbir__calculate_memory(stbir__info *info)
   2539 +{
   2540 +    int pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
   2541 +    int filter_height = stbir__get_filter_pixel_width(info->vertical_filter, info->vertical_scale);
   2542 +
   2543 +    info->horizontal_num_contributors = stbir__get_contributors(info->horizontal_scale, info->horizontal_filter, info->input_w, info->output_w);
   2544 +    info->vertical_num_contributors   = stbir__get_contributors(info->vertical_scale  , info->vertical_filter  , info->input_h, info->output_h);
   2545 +
   2546 +    // One extra entry because floating point precision problems sometimes cause an extra to be necessary.
   2547 +    info->ring_buffer_num_entries = filter_height + 1;
   2548 +
   2549 +    info->horizontal_contributors_size = info->horizontal_num_contributors * sizeof(stbir__contributors);
   2550 +    info->horizontal_coefficients_size = stbir__get_total_horizontal_coefficients(info) * sizeof(float);
   2551 +    info->vertical_contributors_size = info->vertical_num_contributors * sizeof(stbir__contributors);
   2552 +    info->vertical_coefficients_size = stbir__get_total_vertical_coefficients(info) * sizeof(float);
   2553 +    info->decode_buffer_size = (info->input_w + pixel_margin * 2) * info->channels * sizeof(float);
   2554 +    info->horizontal_buffer_size = info->output_w * info->channels * sizeof(float);
   2555 +    info->ring_buffer_size = info->output_w * info->channels * info->ring_buffer_num_entries * sizeof(float);
   2556 +    info->encode_buffer_size = info->output_w * info->channels * sizeof(float);
   2557 +
   2558 +    STBIR_ASSERT(info->horizontal_filter != 0);
   2559 +    STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
   2560 +    STBIR_ASSERT(info->vertical_filter != 0);
   2561 +    STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
   2562 +
   2563 +    if (stbir__use_height_upsampling(info))
   2564 +        // The horizontal buffer is for when we're downsampling the height and we
   2565 +        // can't output the result of sampling the decode buffer directly into the
   2566 +        // ring buffers.
   2567 +        info->horizontal_buffer_size = 0;
   2568 +    else
   2569 +        // The encode buffer is to retain precision in the height upsampling method
   2570 +        // and isn't used when height downsampling.
   2571 +        info->encode_buffer_size = 0;
   2572 +
   2573 +    return info->horizontal_contributors_size + info->horizontal_coefficients_size
   2574 +        + info->vertical_contributors_size + info->vertical_coefficients_size
   2575 +        + info->decode_buffer_size + info->horizontal_buffer_size
   2576 +        + info->ring_buffer_size + info->encode_buffer_size;
   2577 +}
   2578 +
   2579 +static int stbir__resize_allocated(stbir__info *info,
   2580 +    const void* input_data, int input_stride_in_bytes,
   2581 +    void* output_data, int output_stride_in_bytes,
   2582 +    int alpha_channel, stbir_uint32 flags, stbir_datatype type,
   2583 +    stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace,
   2584 +    void* tempmem, size_t tempmem_size_in_bytes)
   2585 +{
   2586 +    size_t memory_required = stbir__calculate_memory(info);
   2587 +
   2588 +    int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : info->channels * info->input_w * stbir__type_size[type];
   2589 +    int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : info->channels * info->output_w * stbir__type_size[type];
   2590 +
   2591 +#ifdef STBIR_DEBUG_OVERWRITE_TEST
   2592 +#define OVERWRITE_ARRAY_SIZE 8
   2593 +    unsigned char overwrite_output_before_pre[OVERWRITE_ARRAY_SIZE];
   2594 +    unsigned char overwrite_tempmem_before_pre[OVERWRITE_ARRAY_SIZE];
   2595 +    unsigned char overwrite_output_after_pre[OVERWRITE_ARRAY_SIZE];
   2596 +    unsigned char overwrite_tempmem_after_pre[OVERWRITE_ARRAY_SIZE];
   2597 +
   2598 +    size_t begin_forbidden = width_stride_output * (info->output_h - 1) + info->output_w * info->channels * stbir__type_size[type];
   2599 +    memcpy(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
   2600 +    memcpy(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE);
   2601 +    memcpy(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
   2602 +    memcpy(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE);
   2603 +#endif
   2604 +
   2605 +    STBIR_ASSERT(info->channels >= 0);
   2606 +    STBIR_ASSERT(info->channels <= STBIR_MAX_CHANNELS);
   2607 +
   2608 +    if (info->channels < 0 || info->channels > STBIR_MAX_CHANNELS)
   2609 +        return 0;
   2610 +
   2611 +    STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
   2612 +    STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
   2613 +
   2614 +    if (info->horizontal_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
   2615 +        return 0;
   2616 +    if (info->vertical_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
   2617 +        return 0;
   2618 +
   2619 +    if (alpha_channel < 0)
   2620 +        flags |= STBIR_FLAG_ALPHA_USES_COLORSPACE | STBIR_FLAG_ALPHA_PREMULTIPLIED;
   2621 +
   2622 +    if (!(flags&STBIR_FLAG_ALPHA_USES_COLORSPACE) || !(flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)) {
   2623 +        STBIR_ASSERT(alpha_channel >= 0 && alpha_channel < info->channels);
   2624 +    }
   2625 +
   2626 +    if (alpha_channel >= info->channels)
   2627 +        return 0;
   2628 +
   2629 +    STBIR_ASSERT(tempmem);
   2630 +
   2631 +    if (!tempmem)
   2632 +        return 0;
   2633 +
   2634 +    STBIR_ASSERT(tempmem_size_in_bytes >= memory_required);
   2635 +
   2636 +    if (tempmem_size_in_bytes < memory_required)
   2637 +        return 0;
   2638 +
   2639 +    memset(tempmem, 0, tempmem_size_in_bytes);
   2640 +
   2641 +    info->input_data = input_data;
   2642 +    info->input_stride_bytes = width_stride_input;
   2643 +
   2644 +    info->output_data = output_data;
   2645 +    info->output_stride_bytes = width_stride_output;
   2646 +
   2647 +    info->alpha_channel = alpha_channel;
   2648 +    info->flags = flags;
   2649 +    info->type = type;
   2650 +    info->edge_horizontal = edge_horizontal;
   2651 +    info->edge_vertical = edge_vertical;
   2652 +    info->colorspace = colorspace;
   2653 +
   2654 +    info->horizontal_coefficient_width   = stbir__get_coefficient_width  (info->horizontal_filter, info->horizontal_scale);
   2655 +    info->vertical_coefficient_width     = stbir__get_coefficient_width  (info->vertical_filter  , info->vertical_scale  );
   2656 +    info->horizontal_filter_pixel_width  = stbir__get_filter_pixel_width (info->horizontal_filter, info->horizontal_scale);
   2657 +    info->vertical_filter_pixel_width    = stbir__get_filter_pixel_width (info->vertical_filter  , info->vertical_scale  );
   2658 +    info->horizontal_filter_pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
   2659 +    info->vertical_filter_pixel_margin   = stbir__get_filter_pixel_margin(info->vertical_filter  , info->vertical_scale  );
   2660 +
   2661 +    info->ring_buffer_length_bytes = info->output_w * info->channels * sizeof(float);
   2662 +    info->decode_buffer_pixels = info->input_w + info->horizontal_filter_pixel_margin * 2;
   2663 +
   2664 +#define STBIR__NEXT_MEMPTR(current, newtype) (newtype*)(((unsigned char*)current) + current##_size)
   2665 +
   2666 +    info->horizontal_contributors = (stbir__contributors *) tempmem;
   2667 +    info->horizontal_coefficients = STBIR__NEXT_MEMPTR(info->horizontal_contributors, float);
   2668 +    info->vertical_contributors = STBIR__NEXT_MEMPTR(info->horizontal_coefficients, stbir__contributors);
   2669 +    info->vertical_coefficients = STBIR__NEXT_MEMPTR(info->vertical_contributors, float);
   2670 +    info->decode_buffer = STBIR__NEXT_MEMPTR(info->vertical_coefficients, float);
   2671 +
   2672 +    if (stbir__use_height_upsampling(info))
   2673 +    {
   2674 +        info->horizontal_buffer = NULL;
   2675 +        info->ring_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
   2676 +        info->encode_buffer = STBIR__NEXT_MEMPTR(info->ring_buffer, float);
   2677 +
   2678 +        STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->encode_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
   2679 +    }
   2680 +    else
   2681 +    {
   2682 +        info->horizontal_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
   2683 +        info->ring_buffer = STBIR__NEXT_MEMPTR(info->horizontal_buffer, float);
   2684 +        info->encode_buffer = NULL;
   2685 +
   2686 +        STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->ring_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
   2687 +    }
   2688 +
   2689 +#undef STBIR__NEXT_MEMPTR
   2690 +
   2691 +    // This signals that the ring buffer is empty
   2692 +    info->ring_buffer_begin_index = -1;
   2693 +
   2694 +    stbir__calculate_filters(info->horizontal_contributors, info->horizontal_coefficients, info->horizontal_filter, info->horizontal_scale, info->horizontal_shift, info->input_w, info->output_w);
   2695 +    stbir__calculate_filters(info->vertical_contributors, info->vertical_coefficients, info->vertical_filter, info->vertical_scale, info->vertical_shift, info->input_h, info->output_h);
   2696 +
   2697 +    STBIR_PROGRESS_REPORT(0);
   2698 +
   2699 +    if (stbir__use_height_upsampling(info))
   2700 +        stbir__buffer_loop_upsample(info);
   2701 +    else
   2702 +        stbir__buffer_loop_downsample(info);
   2703 +
   2704 +    STBIR_PROGRESS_REPORT(1);
   2705 +
   2706 +#ifdef STBIR_DEBUG_OVERWRITE_TEST
   2707 +    STBIR_ASSERT(memcmp(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
   2708 +    STBIR_ASSERT(memcmp(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0);
   2709 +    STBIR_ASSERT(memcmp(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
   2710 +    STBIR_ASSERT(memcmp(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE) == 0);
   2711 +#endif
   2712 +
   2713 +    return 1;
   2714 +}
   2715 +
   2716 +
   2717 +static int stbir__resize_arbitrary(
   2718 +    void *alloc_context,
   2719 +    const void* input_data, int input_w, int input_h, int input_stride_in_bytes,
   2720 +    void* output_data, int output_w, int output_h, int output_stride_in_bytes,
   2721 +    float s0, float t0, float s1, float t1, float *transform,
   2722 +    int channels, int alpha_channel, stbir_uint32 flags, stbir_datatype type,
   2723 +    stbir_filter h_filter, stbir_filter v_filter,
   2724 +    stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace)
   2725 +{
   2726 +    stbir__info info;
   2727 +    int result;
   2728 +    size_t memory_required;
   2729 +    void* extra_memory;
   2730 +
   2731 +    stbir__setup(&info, input_w, input_h, output_w, output_h, channels);
   2732 +    stbir__calculate_transform(&info, s0,t0,s1,t1,transform);
   2733 +    stbir__choose_filter(&info, h_filter, v_filter);
   2734 +    memory_required = stbir__calculate_memory(&info);
   2735 +    extra_memory = STBIR_MALLOC(memory_required, alloc_context);
   2736 +
   2737 +    if (!extra_memory)
   2738 +        return 0;
   2739 +
   2740 +    result = stbir__resize_allocated(&info, input_data, input_stride_in_bytes,
   2741 +                                            output_data, output_stride_in_bytes,
   2742 +                                            alpha_channel, flags, type,
   2743 +                                            edge_horizontal, edge_vertical,
   2744 +                                            colorspace, extra_memory, memory_required);
   2745 +
   2746 +    STBIR_FREE(extra_memory, alloc_context);
   2747 +
   2748 +    return result;
   2749 +}
   2750 +
   2751 +STBIRDEF int stbir_resize_uint8(     const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
   2752 +                                           unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
   2753 +                                     int num_channels)
   2754 +{
   2755 +    return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
   2756 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2757 +        0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
   2758 +        STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
   2759 +}
   2760 +
   2761 +STBIRDEF int stbir_resize_float(     const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
   2762 +                                           float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
   2763 +                                     int num_channels)
   2764 +{
   2765 +    return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
   2766 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2767 +        0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_FLOAT, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
   2768 +        STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
   2769 +}
   2770 +
   2771 +STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
   2772 +                                           unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
   2773 +                                     int num_channels, int alpha_channel, int flags)
   2774 +{
   2775 +    return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
   2776 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2777 +        0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
   2778 +        STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_SRGB);
   2779 +}
   2780 +
   2781 +STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
   2782 +                                                    unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
   2783 +                                              int num_channels, int alpha_channel, int flags,
   2784 +                                              stbir_edge edge_wrap_mode)
   2785 +{
   2786 +    return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
   2787 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2788 +        0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
   2789 +        edge_wrap_mode, edge_wrap_mode, STBIR_COLORSPACE_SRGB);
   2790 +}
   2791 +
   2792 +STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
   2793 +                                               unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
   2794 +                                         int num_channels, int alpha_channel, int flags,
   2795 +                                         stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
   2796 +                                         void *alloc_context)
   2797 +{
   2798 +    return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
   2799 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2800 +        0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, filter, filter,
   2801 +        edge_wrap_mode, edge_wrap_mode, space);
   2802 +}
   2803 +
   2804 +STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels  , int input_w , int input_h , int input_stride_in_bytes,
   2805 +                                               stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
   2806 +                                         int num_channels, int alpha_channel, int flags,
   2807 +                                         stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
   2808 +                                         void *alloc_context)
   2809 +{
   2810 +    return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
   2811 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2812 +        0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT16, filter, filter,
   2813 +        edge_wrap_mode, edge_wrap_mode, space);
   2814 +}
   2815 +
   2816 +
   2817 +STBIRDEF int stbir_resize_float_generic( const float *input_pixels         , int input_w , int input_h , int input_stride_in_bytes,
   2818 +                                               float *output_pixels        , int output_w, int output_h, int output_stride_in_bytes,
   2819 +                                         int num_channels, int alpha_channel, int flags,
   2820 +                                         stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
   2821 +                                         void *alloc_context)
   2822 +{
   2823 +    return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
   2824 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2825 +        0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_FLOAT, filter, filter,
   2826 +        edge_wrap_mode, edge_wrap_mode, space);
   2827 +}
   2828 +
   2829 +
   2830 +STBIRDEF int stbir_resize(         const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
   2831 +                                         void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
   2832 +                                   stbir_datatype datatype,
   2833 +                                   int num_channels, int alpha_channel, int flags,
   2834 +                                   stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
   2835 +                                   stbir_filter filter_horizontal,  stbir_filter filter_vertical,
   2836 +                                   stbir_colorspace space, void *alloc_context)
   2837 +{
   2838 +    return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
   2839 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2840 +        0,0,1,1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
   2841 +        edge_mode_horizontal, edge_mode_vertical, space);
   2842 +}
   2843 +
   2844 +
   2845 +STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
   2846 +                                         void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
   2847 +                                   stbir_datatype datatype,
   2848 +                                   int num_channels, int alpha_channel, int flags,
   2849 +                                   stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
   2850 +                                   stbir_filter filter_horizontal,  stbir_filter filter_vertical,
   2851 +                                   stbir_colorspace space, void *alloc_context,
   2852 +                                   float x_scale, float y_scale,
   2853 +                                   float x_offset, float y_offset)
   2854 +{
   2855 +    float transform[4];
   2856 +    transform[0] = x_scale;
   2857 +    transform[1] = y_scale;
   2858 +    transform[2] = x_offset;
   2859 +    transform[3] = y_offset;
   2860 +    return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
   2861 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2862 +        0,0,1,1,transform,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
   2863 +        edge_mode_horizontal, edge_mode_vertical, space);
   2864 +}
   2865 +
   2866 +STBIRDEF int stbir_resize_region(  const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
   2867 +                                         void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
   2868 +                                   stbir_datatype datatype,
   2869 +                                   int num_channels, int alpha_channel, int flags,
   2870 +                                   stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
   2871 +                                   stbir_filter filter_horizontal,  stbir_filter filter_vertical,
   2872 +                                   stbir_colorspace space, void *alloc_context,
   2873 +                                   float s0, float t0, float s1, float t1)
   2874 +{
   2875 +    return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
   2876 +        output_pixels, output_w, output_h, output_stride_in_bytes,
   2877 +        s0,t0,s1,t1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
   2878 +        edge_mode_horizontal, edge_mode_vertical, space);
   2879 +}
   2880 +
   2881 +#endif // STB_IMAGE_RESIZE_IMPLEMENTATION
   2882 +
   2883 +/*
   2884 +------------------------------------------------------------------------------
   2885 +This software is available under 2 licenses -- choose whichever you prefer.
   2886 +------------------------------------------------------------------------------
   2887 +ALTERNATIVE A - MIT License
   2888 +Copyright (c) 2017 Sean Barrett
   2889 +Permission is hereby granted, free of charge, to any person obtaining a copy of
   2890 +this software and associated documentation files (the "Software"), to deal in
   2891 +the Software without restriction, including without limitation the rights to
   2892 +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   2893 +of the Software, and to permit persons to whom the Software is furnished to do
   2894 +so, subject to the following conditions:
   2895 +The above copyright notice and this permission notice shall be included in all
   2896 +copies or substantial portions of the Software.
   2897 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   2898 +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   2899 +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   2900 +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   2901 +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   2902 +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   2903 +SOFTWARE.
   2904 +------------------------------------------------------------------------------
   2905 +ALTERNATIVE B - Public Domain (www.unlicense.org)
   2906 +This is free and unencumbered software released into the public domain.
   2907 +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
   2908 +software, either in source code form or as a compiled binary, for any purpose,
   2909 +commercial or non-commercial, and by any means.
   2910 +In jurisdictions that recognize copyright laws, the author or authors of this
   2911 +software dedicate any and all copyright interest in the software to the public
   2912 +domain. We make this dedication for the benefit of the public at large and to
   2913 +the detriment of our heirs and successors. We intend this dedication to be an
   2914 +overt act of relinquishment in perpetuity of all present and future rights to
   2915 +this software under copyright law.
   2916 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   2917 +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   2918 +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   2919 +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   2920 +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
   2921 +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   2922 +------------------------------------------------------------------------------
   2923 +*/