libzahl

big integer library
git clone git://git.suckless.org/libzahl
Log | Files | Refs | README | LICENSE

zsqr.c (1377B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include "internals.h"
      3 
      4 
      5 static inline void
      6 zsqr_ll_single_char(z_t a, z_t b)
      7 {
      8 	ENSURE_SIZE(a, 1);
      9 	a->used = 1;
     10 	a->chars[0] = b->chars[0] * b->chars[0];
     11 	SET_SIGNUM(a, 1);
     12 }
     13 
     14 void
     15 zsqr_ll(z_t a, z_t b)
     16 {
     17 	/*
     18 	 * Karatsuba algorithm, optimised for equal factors.
     19 	 */
     20 
     21 #define z2 a
     22 	z_t z0, z1, high, low;
     23 	zahl_char_t auxchars[3 * ZAHL_FLUFF];
     24 	size_t bits;
     25 
     26 	bits = zbits(b);
     27 
     28 	if (bits <= BITS_PER_CHAR / 2) {
     29 		zsqr_ll_single_char(a, b);
     30 		return;
     31 	}
     32 
     33 	bits >>= 1;
     34 
     35 	/* Try to split only at a character level rather than a bit level.
     36 	 * Such splits are faster, even if bit-level is required, and do
     37 	 * not require auxiliary memory except for the bit-level split
     38 	 * which require constant auxiliary memory. */
     39 	if (bits < BITS_PER_CHAR) {
     40 		low->chars = auxchars;
     41 		high->chars = auxchars + ZAHL_FLUFF;
     42 		zsplit_unsigned_fast_small_auto(high, low, b, bits);
     43 	} else {
     44 		bits = TRUNCATE_TO_CHAR(bits);
     45 		zsplit_unsigned_fast_large_taint(high, low, b, bits);
     46 	}
     47 
     48 
     49 	if (unlikely(zzero(low))) {
     50 		zsqr_ll(z2, high);
     51 		zlsh(a, z2, bits << 1);
     52 	} else {
     53 		zinit_temp(z0);
     54 		zinit_temp(z1);
     55 
     56 		zsqr_ll(z0, low);
     57 
     58 		zmul_ll(z1, low, high);
     59 		zlsh(z1, z1, bits + 1);
     60 
     61 		zsqr_ll(z2, high);
     62 		zlsh(a, z2, bits << 1);
     63 
     64 		zadd_unsigned_assign(a, z1);
     65 		zadd_unsigned_assign(a, z0);
     66 
     67 		zfree_temp(z1);
     68 		zfree_temp(z0);
     69 	}
     70 }