libzahl

big integer library
git clone git://git.suckless.org/libzahl
Log | Files | Refs | README | LICENSE

zmul.c (1628B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include "internals.h"
      3 
      4 
      5 static inline void
      6 zmul_ll_single_char(z_t a, z_t b, z_t c)
      7 {
      8 	ENSURE_SIZE(a, 1);
      9 	a->used = 1;
     10 	a->chars[0] = b->chars[0] * c->chars[0];
     11 	SET_SIGNUM(a, 1);
     12 }
     13 
     14 void
     15 zmul_ll(z_t a, z_t b, z_t c)
     16 {
     17 	/*
     18 	 * Karatsuba algorithm
     19 	 * 
     20 	 * Basically, this is how you were taught to multiply large numbers
     21 	 * by hand in school: 4010⋅3020 = (4000 + 10)(3000 + 20) =
     22 	 * = 40⋅30⋅10⁴ + (40⋅20 + 30⋅10)⋅10² + 10⋅20, but the middle is
     23 	 * optimised to only one multiplication:
     24 	 * 40⋅20 + 30⋅10 = (40 + 10)(30 + 20) − 40⋅30 − 10⋅20.
     25 	 * This optimisation is crucial. Without it, the algorithm with
     26 	 * run in O(n²).
     27 	 */
     28 
     29 #define z2 c_low
     30 #define z1 b_low
     31 #define z0 a
     32 	size_t m, m2;
     33 	z_t b_high, b_low, c_high, c_low;
     34 
     35 	if (unlikely(zzero1(b, c))) {
     36 		SET_SIGNUM(a, 0);
     37 		return;
     38 	}
     39 
     40 	m = zbits(b);
     41 	m2 = b == c ? m : zbits(c);
     42 
     43 	if (m + m2 <= BITS_PER_CHAR) {
     44 		zmul_ll_single_char(a, b, c);
     45 		return;
     46 	}
     47 
     48         m = MAX(m, m2);
     49 	m2 = m >> 1;
     50 
     51 	zinit_temp(b_high);
     52 	zinit_temp(b_low);
     53 	zinit_temp(c_high);
     54 	zinit_temp(c_low);
     55 
     56 	zsplit_pz(b_high, b_low, b, m2);
     57 	zsplit_pz(c_high, c_low, c, m2);
     58 
     59 
     60 	zmul_ll(z0, b_low, c_low);
     61 	zadd_unsigned_assign(b_low, b_high);
     62 	zadd_unsigned_assign(c_low, c_high);
     63 	zmul_ll(z1, b_low, c_low);
     64 	zmul_ll(z2, b_high, c_high);
     65 
     66 	zsub_nonnegative_assign(z1, z0);
     67 	zsub_nonnegative_assign(z1, z2);
     68 
     69 	zlsh(z1, z1, m2);
     70 	m2 <<= 1;
     71 	zlsh(z2, z2, m2);
     72 	zadd_unsigned_assign(a, z1);
     73 	zadd_unsigned_assign(a, z2);
     74 
     75 
     76 	zfree_temp(c_low);
     77 	zfree_temp(c_high);
     78 	zfree_temp(b_low);
     79 	zfree_temp(b_high);
     80 }