zmul.c (1628B)
1 /* See LICENSE file for copyright and license details. */ 2 #include "internals.h" 3 4 5 static inline void 6 zmul_ll_single_char(z_t a, z_t b, z_t c) 7 { 8 ENSURE_SIZE(a, 1); 9 a->used = 1; 10 a->chars[0] = b->chars[0] * c->chars[0]; 11 SET_SIGNUM(a, 1); 12 } 13 14 void 15 zmul_ll(z_t a, z_t b, z_t c) 16 { 17 /* 18 * Karatsuba algorithm 19 * 20 * Basically, this is how you were taught to multiply large numbers 21 * by hand in school: 4010⋅3020 = (4000 + 10)(3000 + 20) = 22 * = 40⋅30⋅10⁴ + (40⋅20 + 30⋅10)⋅10² + 10⋅20, but the middle is 23 * optimised to only one multiplication: 24 * 40⋅20 + 30⋅10 = (40 + 10)(30 + 20) − 40⋅30 − 10⋅20. 25 * This optimisation is crucial. Without it, the algorithm with 26 * run in O(n²). 27 */ 28 29 #define z2 c_low 30 #define z1 b_low 31 #define z0 a 32 size_t m, m2; 33 z_t b_high, b_low, c_high, c_low; 34 35 if (unlikely(zzero1(b, c))) { 36 SET_SIGNUM(a, 0); 37 return; 38 } 39 40 m = zbits(b); 41 m2 = b == c ? m : zbits(c); 42 43 if (m + m2 <= BITS_PER_CHAR) { 44 zmul_ll_single_char(a, b, c); 45 return; 46 } 47 48 m = MAX(m, m2); 49 m2 = m >> 1; 50 51 zinit_temp(b_high); 52 zinit_temp(b_low); 53 zinit_temp(c_high); 54 zinit_temp(c_low); 55 56 zsplit_pz(b_high, b_low, b, m2); 57 zsplit_pz(c_high, c_low, c, m2); 58 59 60 zmul_ll(z0, b_low, c_low); 61 zadd_unsigned_assign(b_low, b_high); 62 zadd_unsigned_assign(c_low, c_high); 63 zmul_ll(z1, b_low, c_low); 64 zmul_ll(z2, b_high, c_high); 65 66 zsub_nonnegative_assign(z1, z0); 67 zsub_nonnegative_assign(z1, z2); 68 69 zlsh(z1, z1, m2); 70 m2 <<= 1; 71 zlsh(z2, z2, m2); 72 zadd_unsigned_assign(a, z1); 73 zadd_unsigned_assign(a, z2); 74 75 76 zfree_temp(c_low); 77 zfree_temp(c_high); 78 zfree_temp(b_low); 79 zfree_temp(b_high); 80 }