Two posts ago I have talked about the “Hacker’s Delight” implementation of Prof. Donald Knuth’s long division algorithm D for finite integers and my solution for 26 bit long words. The port to JavaScript was quite plain. The pointer-juggling in `main`

has been replaced by an `Array`

, the integer division has been done with the help of a final `Math.floor()`

, rightshifts by base had been replaced by division and the `goto`

by a loop.

Didn’t work.

Failed a couple of times and without any rhyme or reason it seemed.

Debugging JavaScript is still not very comfortable despite large efforts by the Mozilla team, it is in many cases simpler to just print out the variable contents, especially when porting. The culprit here was the seemingly innocent helper function `nlz`

that counts the zeros from MSB to first set bit (others may call it highbit) and is used to evaluate the distance of the first shift, the normalizing of the combatants.

It was the old problem with signed/unsigned integers in JavaScript. It is necessary here to make the input an unsigned integer by the obligatory unsigned right shift by zero `x>>>0`

.

So here is the complete code ready to be c&p’ed into Firefox scratchpad or similar and run. Should print the same output as the implementation by “Hacker’s Delight”.

function nlz(x){ var n = 0; if (x>>>0 == 0) return(32); if (x>>>0 <= 0x0000FFFF) {n = n +16; x = x <<16;} if (x>>>0 <= 0x00FFFFFF) {n = n + 8; x = x << 8;} if (x>>>0 <= 0x0FFFFFFF) {n = n + 4; x = x << 4;} if (x>>>0 <= 0x3FFFFFFF) {n = n + 2; x = x << 2;} if (x>>>0 <= 0x7FFFFFFF) {n = n + 1;} return n|0; } function divmnu(q,r,u,v,m,n) { var b = 1<<MP_DIGIT_BIT>>>0; // Number base. var mask = b-1>>>0; // Number mask b-1 var un, vn; // Normalized form of u, v. var qhat; // Estimated quotient digit. var rhat; // A remainder. var p; // Product of two digits. var s, i, j, t, k; if (m < n || n <= 0 || v[n-1] == 0){ return MP_VAL; // Return if invalid param. } if (n == 1) { // Take care of k = 0; // the case of a for (j = m - 1; j >= 0; j--) { // single-digit q[j] = Math.floor((k*b + u[j])/v[0]); // divisor here. k = (k*b + u[j]) - q[j]*v[0]; } if (r != null) r[0] = k; return 0; } // Normalize by shifting v left just enough so that // its high-order bit is on, and shift u left the // same amount. We may have to append a high-order // digit on the dividend; we do that unconditionally. s = (nlz(v[n-1]) - 7); // 0 <= s <= 25. vn = new Array(n); for (i = n - 1; i > 0; i--){ vn[i] = ((v[i] << s)&mask) | ((v[i-1] >>> (MP_DIGIT_BIT-s))&mask); } vn[0] = (v[0] << s)&mask; un = new Array((m + 1)); un[m] = (u[m-1] >>> (MP_DIGIT_BIT-s))&mask; for (i = m - 1; i > 0; i--) un[i] = ((u[i] << s)&mask) | ((u[i-1] >>> (MP_DIGIT_BIT-s))&mask); un[0] = (u[0] << s)&mask; for (j = m - n; j >= 0; j--) { // Main loop. // Compute estimate qhat of q[j]. qhat = Math.floor( (un[j+n]*b + un[j+n-1]) / vn[n-1] ); rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1]; while(true){ if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2]) { qhat = qhat - 1; rhat = rhat + vn[n-1]; if (rhat < b) continue; } break; } // Multiply and subtract. k = 0; for (i = 0; i < n; i++) { p = qhat*vn[i]; t = un[i+j] - k - (p & mask); un[i+j] = t&mask; k = Math.floor(p/b) - Math.floor(t/b); } t = un[j+n] - k; un[j+n] = t; q[j] = qhat; // Store quotient digit. if (t < 0) { // If we subtracted too q[j] = q[j] - 1; // much, add back. k = 0; for (i = 0; i < n; i++) { t = un[i+j] + vn[i] + k; un[i+j] = t&mask; k = Math.floor(t/b); } un[j+n] = un[j+n] + k; } } // End j. // If the caller wants the remainder, unnormalize // it and pass it back. if (r != null) { for (i = 0; i < n; i++) r[i] = ((un[i] >>> s)&mask) | ((un[i+1] << (MP_DIGIT_BIT-s))&mask); r[n-1] = (un[n-1] >>> s)&mask; } return 0; } Number.prototype.toHex32 = function(uppercase){ var t = this; var lower = "0123456789abcdef"; var upper = "0123456789ABCDEF"; var rcase = uppercase||false; var ret = ""; rcase = (rcase)?upper:lower; if(t == 0){return "0";} for(var i = 0;i<8;i++){ ret = rcase.charAt((t&0xF)) + ret; t >>>= 4; } return ret; }; function dumpit(msg, n, v) { var i,s=msg + ""; //console.log(msg); for (i = n-1; i >= 0; i--) s += v[i].toHex32()+","; console.log(s); } var errors; function check(q, r,u,v,m, n,cq,cr) { var i, szq; szq = Math.max(m - n + 1, 1); for (i = 0; i < szq; i++) { if (q[i] != cq[i]) { errors = errors + 1; dumpit("Error, dividend u =", m, u); dumpit(" divisor v =", n, v); dumpit("For quotient, got:", m-n+1, q); dumpit(" Should get:", m-n+1, cq); return; } } for (i = 0; i < n; i++) { if (r[i] != cr[i]) { errors = errors + 1; dumpit("Error, dividend u =", m, u); dumpit(" divisor v =", n, v); dumpit("For remainder, got:", n, r); dumpit(" Should get:", n, cr); return; } } return; } var test = [ // m, n, u..., v..., cq..., cr.... /* 1 */ [1, 1, [3], [0], [1], [1]], // Error, divide by 0. /* 2 */ [1, 2, [7], [1,3], [0], [7,0]], // Error, n > m. /* 3 */ [2, 2, [0,0], [1,0], [0], [0,0]], // Error, incorrect remainder cr. /* 4 */ [1, 1, [3], [2], [1], [1]], /* 5 */ [1, 1, [3], [3], [1], [0]], /* 6 */ [1, 1, [3], [4], [0], [3]], /* 7 */ [1, 1, [0], [0x3ffffff], [0], [0]], /* 8 */ [1, 1, [0x3ffffff], [1], [0x3ffffff], [0]], /* 9 */ [1, 1, [0x3ffffff], [0x3ffffff], [1], [0]], /* 10 */[1, 1, [0x3ffffff], [3], [0x01555555], [0]], /* 11 */[2, 1, [0x3ffffff,0x3ffffff], [1], [0x3ffffff,0x3ffffff], [0]], /* 12 */[2, 1, [0x3ffffff,0x3ffffff], [0x3ffffff], [1,1], [0]], /* 13 */[2, 1, [0x3ffffff,0x3fffffe], [0x3ffffff], [0x3ffffff,0], [0x3fffffe]], /* 14 */[2, 1, [0x00005678,0x00001234], [0x00009abc],[0x007876ea,0], [0x3ea0]], /* 15 */[2, 2, [0,0], [0,1], [0], [0,0]], /* 16 */[2, 2, [0,7], [0,3], [2], [0,1]], /* 17 */[2, 2, [5,7], [0,3], [2], [5,1]], /* 18 */[2, 2, [0,6], [0,2], [3], [0,0]], /* 19 */[1, 1, [0x800000], [0x400001], [0x00000001], [0x3fffff]], /* 20 */[2, 1, [0x00000000,0x800000], [0x400001], [0x3ffffe0,0x00000001], [0x00000020]], /* 21 */[2, 2, [0x00000000,0x800000], [0x00000001,0x400000], [0x00000001], [0x3ffffff,0x3fffff]], /* 22 */[2, 2, [0x0000789a,0x00bcde], [0x0000789a,0x00bcde], [1], [0,0]], /* 23 */[2, 2, [0x0000789b,0x0000bcde], [0x0000789a,0x0000bcde], [1], [1,0]], /* 24 */[2, 2, [0x00007899,0x0000bcde], [0x0000789a,0x0000bcde], [0], [0x00007899,0x0000bcde]], /* 25 */[2, 2, [0x0000ffff,0x0000ffff], [0x0000ffff,0x0000ffff], [1], [0,0]], /* 26 */[2, 2, [0x0000ffff,0x0000ffff], [0x00000000,0x00000001], [0x0000ffff], [0x0000ffff,0]], /* 27 */[3, 2, [0x000089ab,0x00004567,0x00000123], [0x00000000,0x00000001], [0x00004567,0x00000123], [0x000089ab,0]], /* 28 */[3, 2, [0x0,0xfffe,0x8000], [0xffff,0x8000], [0x3ffffff,0x0], [0xffff,0x7fff]], // Shows that first qhat can = b + 1. /* 29 */[3, 3, [0x3,0x0,0x800000], [0x1,0x0,0x200000], [ 0x3], [0,0,0x200000]], // Adding back step req'd. /* 30 */[3, 3, [0x3,0x0,0x00008000], [0x1,0x0,0x2000], [ 0x3], [0,0,0x2000]], // Adding back step req'd. /* 31 */[4, 3, [0,0,0x8000,0x7fff], [1,0,0x8000], [ 0x3fff800,0], [0x800,0x3ffffff,0x7fff]], // Add back req'd. /* 32 */[4, 3, [0,0xfffe,0,0x8000], [0xffff,0,0x8000], [0x3ffffff,0], [0xffff,0x3ffffff,0x7fff]], // Shows that mult-sub quantity cannot be treated as signed. /* 33 */[4, 3, [0,0x3fffffe,0,0x800000], [0xffff,0,0x800000], [0x0,1], [0x0,0x3feffff,0x0]], // Shows that mult-sub quantity cannot be treated as signed. /* 34 */[4, 3, [0,0x3fffffe,0,0x800000], [0x3ffffff,0,0x800000], [0x3ffffff,0], [0x3ffffff,0x3ffffff,0x07fffff]], // Shows that mult-sub quantity cannot be treated as signed. ]; var i, n, m, ncases, f,errors=0; var q=[], r=[]; var u=[], v=[], cq=[], cr=[]; console.log("divmnu:"); i = 0; ncases = 0; while (i < test.length) // comment out the while--line and set i = 21 to try // the problem described above with an unchanged nlz() function { m = test[i][0]; n = test[i][1]; u = test[i][2]; v = test[i][3]; cq = test[i][4]; cr = test[i][5]; f = divmnu(q, r, u, v, m, n); if (f) { dumpit("Error return code for dividend u =", m, u); dumpit(" divisor v =", n, v); errors = errors + 1; } else check(q, r, u, v, m, n, cq, cr); i++; ncases = ncases + 1; } console.log(errors + " errors out of "+ncases+" cases; there should be 3.\n");

The function `Number.prototype.toHex32`

is used to emulate the fixed width `%08x`

`printf`

format.

Another way to get the place where the high-bit resides is the algorithm I used for the build-in library for Little:

Number.prototype.highBit = function(){ if (this == 0) return 0; var bits = [0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000]; var Shift = [1, 2, 4, 8, 16]; var ret = 0, x = this>>>0; for (var i = bits.length -1; i >= 0; i--){ if (x & bits[i]){ x >>>= Shift[i]; ret |= Shift[i]; } } return ret; };

This functions counts from LSB and is hence off by one and reciprocal in contrast to `nlz()`

. I have more use for that output than the kind `nlz`

offers.

The line to change is 40 (probably, WordPress’ highlighter might have a different opinion) and goes from

s = nlz(v[n-1]) - 7;

to

s = 26 - ((v[n-1]).highBit()+1);

For the full implementation into my Bigint library it needs some checks and balances, of course. I follow the principle used by Tom de Denis in his Libtommath and split the actual algorithm from the checks&balances part.

The arithmetic principles for division are as foll…uhm, no that sounds awkward even for my highly insensible ears 😉

Let the division be

From this we have some shortcuts available:

We can shorten these further to and .

The divison by 0 is undefined but the errors thrown are not. IEEE-745 has this to say:

7.3 Division by zero 7.30

The divideByZero exception shall be signaled if and only if an exact infinite result is defined for an operation on finite operands. The default result of divideByZero shall be an`Inf`

correctly signed according to the operation:

- For division, when the divisor is zero and the dividend is a finite non-zero

number, the sign of the infinity is the exclusive OR of the operands’ signs (see 6.3)

As a mathematical expression:

That means that the `Infinity`

has a negative sign iff . Sounds curious but we have signed zeros in IEEE-754, so the return is only negative if the divisor is a negative zero.

That gives

Bigint.prototype.divrem = function(bint){ var a = this.abs(); var b = bint.abs(); var q,r,ret; var qsign, rsign; /* Some checks (NaN, Inf) ommited */ if(b.isZero()){ return (b.sign == MP_NEG)?Bigint.NEGATIVE_INFINITY :Bigint.POSITIVE_INFINITY; } // Single digit b if(b.used == 1){ return a.divremInt(b.dp[0]); } // a < b if(a.cmp_mag(b) == MP_LT ){ return [new Bigint(0),a]; } qsign = ( (a.sign * b.sign) < 0)?MP_NEG:MP_ZPOS; rsign = (a.sign = MP_NEG)?MP_NEG:MP_ZPOS; ret = a.kdivrem(b); // long version q = ret[0]; r = ret[1]; q.sign = qsign; r.sign = rsign; /* Arguments given to kdivrem() must be clean to avoid error checking here. */ return [q,r] };

A short sketch for `kdivrem`

Bigint.prototype.divrem = function(bint){ var divmnu = function (q,r,u,v,m,n) { ... return [q,r]; }; var U = this.dp; var M = this.used; var V = bint.dp; var N = bint.used; var Q = new Bigint(0); var R = new Bigint(0); return divmnu(Q.dp,R.dp,U,V,M,N); };

A lot of temporary memory is used but avoiding it would make the code quite complicated, too complicated for the condition “good readability”.

There are some general optimizations possible:

- division by which are just shifts
- division by 3 (5) which uses an approximation first
- Divide & Conquer algorithms
- Barret’s algorithm
- Multiply with inverse (Newton’s method)

The first point is a must, item two is easy to implement, so I’ll do it, position three, Well, I tried one with libtommath but the gain was small and only with very large numbers and not even always—Barret’s algorithm and Newton’s method are fast[1] and relatively simple to implement on the other hand, so they win this contest.

I won’t treat school book addition and multiplication here, that is really left as an exercise for the reader 😉

Now that I have the basics, I’m going to use it.

- exponentiation
- squaring
- (E)GCD
- Square- and other roots
- Some operations modulo x
- …

I think I’ll treat multiplication in my next post. Squaring, Toom-Cook (incl. Karatsuba) and, if that thing doesn’t grow too big FFT multiplication. Not to forget operator balancing which can gain quite much for multiplication.

[1] Hasselström, Karl. “Fast Division of Large Integers.” (2003).