blind-multiply-matrices.c (3451B)
1 /* See LICENSE file for copyright and license details. */ 2 #ifndef TYPE 3 #include "common.h" 4 5 USAGE("[-en] leftmost-stream ... rightmost-stream") 6 7 static int equal = 0; 8 static size_t max_frame_size; 9 10 #define FILE "blind-multiply-matrices.c" 11 #include "define-functions.h" 12 13 int 14 main(int argc, char *argv[]) 15 { 16 struct stream *streams; 17 size_t n_streams, i, frames = 0; 18 int natural = 0, j; 19 char **rev_argv; 20 size_t max_width = 0, max_height = 0; 21 size_t width = 0, height = 0, w, h; 22 void (*process)(struct stream *streams, size_t n_streams); 23 24 ARGBEGIN { 25 case 'e': 26 equal = 1; 27 break; 28 case 'n': 29 natural = 1; 30 break; 31 default: 32 usage(); 33 } ARGEND; 34 35 if (argc < 2) 36 usage(); 37 38 if (natural) { 39 rev_argv = alloca((size_t)argc * sizeof(*rev_argv)); 40 for (j = 0; j < argc; j++) 41 rev_argv[j] = argv[argc - 1 - j]; 42 argv = rev_argv; 43 } 44 45 n_streams = (size_t)argc; 46 streams = ecalloc(n_streams, sizeof(*streams)); 47 48 for (i = 0; i < n_streams; i++) { 49 eopen_stream(streams + i, argv[i]); 50 if (streams[i].frames && streams[i].frames < frames) 51 frames = streams[i].frames; 52 if (streams->width > max_width) 53 max_width = streams->width; 54 if (streams->height > max_height) 55 max_height = streams->height; 56 } 57 for (i = 1; i < n_streams; i++) 58 if (strcmp(streams->pixfmt, streams[i].pixfmt)) 59 eprintf("videos use incompatible pixel formats\n"); 60 61 width = streams[n_streams - 1].width; 62 height = streams[n_streams - 1].height; 63 for (i = n_streams - 1; i--;) { 64 if (streams[i].width != height) 65 eprintf("videos do not have the compatible geometry\n"); 66 height = streams[i].height; 67 } 68 69 SELECT_PROCESS_FUNCTION(streams); 70 CHECK_N_CHAN(streams, 1, 4); 71 72 w = streams->width, streams->width = max_width; 73 h = streams->height, streams->height = max_height; 74 echeck_dimensions(streams, WIDTH | HEIGHT, NULL); 75 streams->width = width; 76 streams->height = height; 77 streams->frames = frames; 78 fprint_stream_head(stdout, streams); 79 streams->width = w; 80 streams->height = h; 81 efflush(stdout, "<stdout>"); 82 max_frame_size = max_width * max_height * streams->pixel_size; 83 84 process(streams, n_streams); 85 86 free(streams); 87 return 0; 88 } 89 90 #else 91 92 static void 93 PROCESS(struct stream *streams, size_t n_streams) 94 { 95 typedef TYPE pixel_t[4]; 96 pixel_t *res, *left, *right, *tmp; 97 size_t i, j, w, h, h2, x, y, k, r; 98 res = emalloc(max_frame_size); 99 left = emalloc(max_frame_size); 100 right = emalloc(max_frame_size); 101 102 while (eread_frame(streams + (n_streams - 1), res)) { 103 w = streams[n_streams - 1].width; 104 h = streams[n_streams - 1].height; 105 for (i = n_streams - 1; i--;) { 106 tmp = res, res = right, right = tmp; 107 if (!eread_frame(streams + i, left)) 108 goto done; 109 h2 = streams[i].height; 110 memset(res, 0, w * h2 * streams->pixel_size); 111 112 /* XXX Is there any significant performance to be gained by transposing `right`? */ 113 if (equal) { 114 for (y = r = 0; y < h2; y++) { 115 for (x = 0; x < w; x++, r++) { 116 for (k = 0; k < h; k++) 117 res[r][0] += left[y * h + k][0] * right[k * w + x][0]; 118 for (j = 1; j < streams->n_chan; j++) 119 res[r][j] = res[r][0]; 120 } 121 } 122 } else { 123 for (y = r = 0; y < h2; y++) 124 for (x = 0; x < w; x++, r++) 125 for (k = 0; k < h; k++) 126 for (j = 0; j < streams->n_chan; j++) 127 res[r][j] += left[y * h + k][j] * right[k * w + x][j]; 128 } 129 130 h = h2; 131 } 132 ewriteall(STDOUT_FILENO, res, streams->frame_size, "<stdout>"); 133 } 134 135 done: 136 free(res); 137 free(left); 138 free(right); 139 } 140 141 #endif