commit 2741eab8fcbe01af3972557e2df6e765a9450093
parent 94de8e2bd18743949bbb82d9e8cac109da4a15f3
Author: Mattias Andrée <maandree@kth.se>
Date: Mon, 8 May 2017 22:08:36 +0200
Improve performance and memory usage of blind-transpose
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat:
2 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/man/blind-transpose.1 b/man/blind-transpose.1
@@ -12,8 +12,9 @@ To transpose a videos means to swap the
X and Y coordinates.
.SH REQUIREMENTS
.B blind-transpose
-requires enough free memory to load two full frames into
-memory. A frame requires 32 bytes per pixel it contains.
+requires enough free memory to load one full frames and
+one input column into memory. A frame requires 32 bytes
+per pixel it contains.
.B blind-transpose
has not been optimised for memory usage, but instead
for code simplicity.
diff --git a/src/blind-transpose.c b/src/blind-transpose.c
@@ -9,12 +9,31 @@
USAGE("")
+static size_t srcw, srch, srcwps, srchps, ps;
+
+#define PROCESS(TYPE)\
+ do {\
+ size_t x, i, n = ps / sizeof(TYPE);\
+ char *src, *img;\
+ for (x = 0; x < srchps; x += ps) {\
+ img = row + x;\
+ src = col + x * srcw;\
+ for (i = 0; i < n; i++)\
+ ((TYPE *)img)[i] = ((TYPE *)src)[i];\
+ }\
+ } while (0)
+
+static void process_double(char *row, char *col) {PROCESS(double);}
+static void process_float (char *row, char *col) {PROCESS(float);}
+static void process_char (char *row, char *col) {PROCESS(char);}
+
int
main(int argc, char *argv[])
{
struct stream stream;
- char *buf, *row, *pix, *image, *imag, *img;
- size_t n, srcw, srch, srcwps, srchps, ps, x, y, i;
+ char *buf, *image;
+ size_t n, y;
+ void (*process)(char *col, char *row);
UNOFLAGS(argc);
@@ -27,23 +46,19 @@ main(int argc, char *argv[])
echeck_frame_size(stream.width, stream.height, stream.pixel_size, 0, stream.file);
n = stream.height * stream.width * (ps = stream.pixel_size);
- buf = emalloc(n);
- image = emalloc(n);
-
srchps = srch * ps;
srcwps = srcw * ps;
+ buf = emalloc(n);
+ image = emalloc(srchps);
+
+ process = !(ps % sizeof(double)) ? process_double :
+ !(ps % sizeof(float)) ? process_float : process_char;
+
while (eread_frame(&stream, buf, n)) {
- for (y = 0; y < srchps; y += ps) {
- imag = image + y;
- row = buf + y * srcw;
- for (x = 0; x < srcwps; x += ps) {
- img = imag + x * srch;
- pix = row + x;
- for (i = 0; i < ps; i++)
- img[i] = pix[i];
- }
+ for (y = 0; y < srcwps; y += ps) {
+ process(image, buf + y);
+ ewriteall(STDOUT_FILENO, image, srchps, "<stdout>");
}
- ewriteall(STDOUT_FILENO, image, n, "<stdout>");
}
free(buf);