Multiprecision Long Division: Knuth’s Algorithm D in JavaScript

Two posts ago I have talked about the “Hacker’s Delight” implementation of Prof. Donald Knuth’s long division algorithm D for finite integers and my solution for 26 bit long words. The port to JavaScript was quite plain. The pointer-juggling in main has been replaced by an Array, the integer division has been done with the help of a final Math.floor(), rightshifts by base had been replaced by division and the goto by a loop.

Didn’t work.

Failed a couple of times and without any rhyme or reason it seemed.

Debugging JavaScript is still not very comfortable despite large efforts by the Mozilla team, it is in many cases simpler to just print out the variable contents, especially when porting. The culprit here was the seemingly innocent helper function nlz that counts the zeros from MSB to first set bit (others may call it highbit) and is used to evaluate the distance of the first shift, the normalizing of the combatants.

It was the old problem with signed/unsigned integers in JavaScript. It is necessary here to make the input an unsigned integer by the obligatory unsigned right shift by zero x>>>0.

So here is the complete code ready to be c&p’ed into Firefox scratchpad or similar and run. Should print the same output as the implementation by “Hacker’s Delight”.

function nlz(x){
   var n = 0;
   if (x>>>0 == 0) return(32);
   if (x>>>0 <= 0x0000FFFF) {n = n +16; x = x <<16;}
   if (x>>>0 <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
   if (x>>>0 <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
   if (x>>>0 <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
   if (x>>>0 <= 0x7FFFFFFF) {n = n + 1;}
   return n|0;
}
function divmnu(q,r,u,v,m,n) {

   var b = 1<<MP_DIGIT_BIT>>>0; // Number base.
   var mask = b-1>>>0;          // Number mask b-1
   var un, vn;              // Normalized form of u, v.
   var qhat;                // Estimated quotient digit.
   var rhat;                // A remainder.
   var p;                   // Product of two digits.
   var s, i, j, t, k;

   if (m < n || n <= 0 || v[n-1] == 0){
      return MP_VAL;        // Return if invalid param.
   }

   if (n == 1) {                                    // Take care of
      k = 0;                                        // the case of a
      for (j = m - 1; j >= 0; j--) {                // single-digit
         q[j] = Math.floor((k*b + u[j])/v[0]);      // divisor here.
         k = (k*b + u[j]) - q[j]*v[0];
      }
      if (r != null) r[0] = k;
      return 0;
   }

   // Normalize by shifting v left just enough so that
   // its high-order bit is on, and shift u left the
   // same amount.  We may have to append a high-order
   // digit on the dividend; we do that unconditionally.

   s =  (nlz(v[n-1]) - 7);        // 0 <= s <= 25.
   vn = new Array(n);
   for (i = n - 1; i > 0; i--){
      vn[i] = ((v[i] << s)&mask) | ((v[i-1] >>> (MP_DIGIT_BIT-s))&mask);
   }
   vn[0] = (v[0] << s)&mask;
   un = new Array((m + 1));
   un[m] = (u[m-1] >>> (MP_DIGIT_BIT-s))&mask;
   for (i = m - 1; i > 0; i--)
      un[i] = ((u[i] << s)&mask) | ((u[i-1] >>> (MP_DIGIT_BIT-s))&mask);
   un[0] = (u[0] << s)&mask;
   for (j = m - n; j >= 0; j--) {       // Main loop.
      // Compute estimate qhat of q[j].
      qhat = Math.floor( (un[j+n]*b + un[j+n-1]) / vn[n-1] );
      rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];

      while(true){
        if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
        { 
          qhat = qhat - 1;
          rhat = rhat + vn[n-1];
          if (rhat < b) continue;
        }
        break;
      }

      // Multiply and subtract.
      k = 0;
      for (i = 0; i < n; i++) {
         p = qhat*vn[i];
         t = un[i+j] - k - (p & mask);
         un[i+j] = t&mask;
         k = Math.floor(p/b) - Math.floor(t/b);
      }
      t = un[j+n] - k;
      un[j+n] = t;

      q[j] = qhat;              // Store quotient digit.
      if (t < 0) {              // If we subtracted too
         q[j] = q[j] - 1;       // much, add back.
         k = 0;
         for (i = 0; i < n; i++) {
            t = un[i+j] + vn[i] + k;
            un[i+j] = t&mask;
            k = Math.floor(t/b);
         }
         un[j+n] = un[j+n] + k;
      }
   } // End j.
   // If the caller wants the remainder, unnormalize
   // it and pass it back.
   if (r != null) {
      for (i = 0; i < n; i++)
         r[i] = ((un[i] >>> s)&mask) | ((un[i+1] << (MP_DIGIT_BIT-s))&mask);
      r[n-1] = (un[n-1] >>> s)&mask;
   }
   return 0;
}
Number.prototype.toHex32 = function(uppercase){
  var t     = this;
  var lower = "0123456789abcdef";
  var upper = "0123456789ABCDEF";
  var rcase = uppercase||false;
  var ret   = "";

  rcase = (rcase)?upper:lower;
  if(t == 0){return "0";}  
  for(var i = 0;i<8;i++){
    ret = rcase.charAt((t&0xF)) + ret;
    t >>>= 4;
  }
  return ret;
};

function dumpit(msg, n, v) {
   var i,s=msg + "";
   //console.log(msg);
   for (i = n-1; i >= 0; i--) s += v[i].toHex32()+",";
   console.log(s);
}

var errors;
function check(q, r,u,v,m, n,cq,cr) {
   var i, szq;

   szq = Math.max(m - n + 1, 1);
   for (i = 0; i < szq; i++) {
      if (q[i] != cq[i]) {
         errors = errors + 1;
         dumpit("Error, dividend u =", m, u);
         dumpit("       divisor  v =", n, v);
         dumpit("For quotient,  got:", m-n+1, q);
         dumpit("        Should get:", m-n+1, cq);
         return;
      }
   }
   for (i = 0; i < n; i++) {
      if (r[i] != cr[i]) {
         errors = errors + 1;
         dumpit("Error, dividend u =", m, u);
         dumpit("       divisor  v =", n, v);
         dumpit("For remainder, got:", n, r);
         dumpit("        Should get:", n, cr);
         return;
      }
   }
   return;
}


   var test = [
   // m, n, u...,          v...,          cq...,                                       cr....
/* 1 */ [1, 1, [3],             [0],             [1],                                      [1]],            // Error, divide by 0.
/* 2 */ [1, 2, [7],             [1,3],           [0],                                    [7,0]],          // Error, n > m.
/* 3 */ [2, 2, [0,0],           [1,0],           [0],                                    [0,0]],          // Error, incorrect remainder cr.
/* 4 */ [1, 1, [3],             [2],             [1],                                      [1]],
/* 5 */ [1, 1, [3],             [3],             [1],                                      [0]],
/* 6 */ [1, 1, [3],             [4],             [0],                                      [3]],
/* 7 */ [1, 1, [0],             [0x3ffffff],     [0],                                      [0]],
/* 8 */ [1, 1, [0x3ffffff],     [1],             [0x3ffffff],                              [0]],
/* 9 */ [1, 1, [0x3ffffff],     [0x3ffffff],     [1],                                      [0]],
/* 10 */[1, 1, [0x3ffffff],     [3],             [0x01555555],                             [0]],
/* 11 */[2, 1, [0x3ffffff,0x3ffffff], [1],      [0x3ffffff,0x3ffffff],                     [0]],
/* 12 */[2, 1, [0x3ffffff,0x3ffffff], [0x3ffffff], [1,1],                                  [0]],
/* 13 */[2, 1, [0x3ffffff,0x3fffffe], [0x3ffffff], [0x3ffffff,0],                          [0x3fffffe]],
/* 14 */[2, 1, [0x00005678,0x00001234], [0x00009abc],[0x007876ea,0],                       [0x3ea0]],   
/* 15 */[2, 2, [0,0],           [0,1],           [0],                                      [0,0]],
/* 16 */[2, 2, [0,7],           [0,3],           [2],                                      [0,1]],
/* 17 */[2, 2, [5,7],           [0,3],           [2],                                      [5,1]],
/* 18 */[2, 2, [0,6],           [0,2],           [3],                                      [0,0]],
/* 19 */[1, 1, [0x800000],  [0x400001], [0x00000001],                                      [0x3fffff]],
/* 20 */[2, 1, [0x00000000,0x800000], [0x400001],   [0x3ffffe0,0x00000001],                [0x00000020]],
/* 21 */[2, 2, [0x00000000,0x800000], [0x00000001,0x400000], [0x00000001],                 [0x3ffffff,0x3fffff]],
/* 22 */[2, 2, [0x0000789a,0x00bcde], [0x0000789a,0x00bcde],          [1],                 [0,0]],
/* 23 */[2, 2, [0x0000789b,0x0000bcde], [0x0000789a,0x0000bcde],      [1],                 [1,0]],
/* 24 */[2, 2, [0x00007899,0x0000bcde], [0x0000789a,0x0000bcde],      [0],                 [0x00007899,0x0000bcde]],
/* 25 */[2, 2, [0x0000ffff,0x0000ffff], [0x0000ffff,0x0000ffff],      [1],                 [0,0]],
/* 26 */[2, 2, [0x0000ffff,0x0000ffff], [0x00000000,0x00000001], [0x0000ffff],             [0x0000ffff,0]],
/* 27 */[3, 2, [0x000089ab,0x00004567,0x00000123], [0x00000000,0x00000001],   [0x00004567,0x00000123],                  [0x000089ab,0]],
/* 28 */[3, 2, [0x0,0xfffe,0x8000], [0xffff,0x8000],   [0x3ffffff,0x0],                    [0xffff,0x7fff]],                   // Shows that first qhat can = b + 1.
/* 29 */[3, 3, [0x3,0x0,0x800000], [0x1,0x0,0x200000],  [ 0x3],                            [0,0,0x200000]],                   // Adding back step req'd.
/* 30 */[3, 3, [0x3,0x0,0x00008000], [0x1,0x0,0x2000],  [ 0x3],                            [0,0,0x2000]],                     // Adding back step req'd.
/* 31 */[4, 3, [0,0,0x8000,0x7fff], [1,0,0x8000],  [ 0x3fff800,0],                         [0x800,0x3ffffff,0x7fff]],         // Add back req'd.
/* 32 */[4, 3, [0,0xfffe,0,0x8000], [0xffff,0,0x8000], [0x3ffffff,0],                      [0xffff,0x3ffffff,0x7fff]],        // Shows that mult-sub quantity cannot be treated as signed.
/* 33 */[4, 3, [0,0x3fffffe,0,0x800000], [0xffff,0,0x800000], [0x0,1],                     [0x0,0x3feffff,0x0]],              // Shows that mult-sub quantity cannot be treated as signed.
/* 34 */[4, 3, [0,0x3fffffe,0,0x800000], [0x3ffffff,0,0x800000], [0x3ffffff,0],            [0x3ffffff,0x3ffffff,0x07fffff]],  // Shows that mult-sub quantity cannot be treated as signed.
   ];


   var i, n, m, ncases, f,errors=0;
   var q=[], r=[];
   var u=[], v=[], cq=[], cr=[];

   console.log("divmnu:");
   i = 0;
   ncases = 0;

   while (i < test.length)
// comment out the while--line and set i = 21 to try
// the problem described above with an unchanged nlz() function
 {
      m = test[i][0];
      n = test[i][1];
      u = test[i][2];
      v = test[i][3];
      cq = test[i][4];
      cr = test[i][5];

      f = divmnu(q, r, u, v, m, n);
      if (f) {
         dumpit("Error return code for dividend u =", m, u);
         dumpit("                      divisor  v =", n, v);
         errors = errors + 1;
      }
      else
         check(q, r, u, v, m, n, cq, cr);
      i++;
      ncases = ncases + 1;
   }

  console.log(errors + " errors out of "+ncases+" cases; there should be 3.\n");

The function Number.prototype.toHex32 is used to emulate the fixed width %08x printf format.

Another way to get the place where the high-bit resides is the algorithm I used for the build-in library for Little:

Number.prototype.highBit = function(){
  if (this == 0)
    return 0;
  var bits = [0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000];
  var Shift = [1, 2, 4, 8, 16];
  var ret = 0, x = this>>>0;
  for (var i = bits.length -1; i >= 0; i--){
    if (x & bits[i]){
      x >>>= Shift[i];
      ret |= Shift[i];
    } 
  }
  return ret;
};

This functions counts from LSB and is hence off by one and reciprocal in contrast to nlz(). I have more use for that output than the kind nlz offers.
The line to change is 40 (probably, WordPress’ highlighter might have a different opinion) and goes from

s = nlz(v[n-1]) - 7;

to

s = 26 - ((v[n-1]).highBit()+1);

For the full implementation into my Bigint library it needs some checks and balances, of course. I follow the principle used by Tom de Denis in his Libtommath and split the actual algorithm from the checks&balances part.

The arithmetic principles for division are as foll…uhm, no that sounds awkward even for my highly insensible ears 😉

Let the division be \frac{a}{b} = q \times b + r

From this we have some shortcuts available:

\displaystyle{ \frac{a}{b} = \begin{cases} q = q \land r = r  &\text{if } a > 0 \land b > 0\\ q = -q \land r = -r  &\text{if } a  < 0 \land b > 0\\ q = -q \land r = r  &\text{if } a > 0 \land b < 0\\ q = q \land r = -r  &\text{if } a < 0 \land b > 0\end{cases} }

We can shorten these further to -q \text{ if } ab < 0 and -r \text{ if }  a < 0 .

The divison by 0 is undefined but the errors thrown are not. IEEE-745 has this to say:

7.3 Division by zero 7.30
The divideByZero exception shall be signaled if and only if an exact infinite result is defined for an operation on finite operands. The default result of divideByZero shall be an Inf correctly signed according to the operation:

  • For division, when the divisor is zero and the dividend is a finite non-zero
    number, the sign of the infinity is the exclusive OR of the operands’ signs (see 6.3)

As a mathematical expression:

\displaystyle{ a_s \veebar b_s = \begin{cases} 0 &\text{if } a_s < b_s \\ 1 &\text{otherwise } \end{cases} }

That means that the Infinity has a negative sign iff b < 0. Sounds curious but we have signed zeros in IEEE-754, so the return is only negative if the divisor is a negative zero.

That gives

Bigint.prototype.divrem = function(bint){
  var a = this.abs();
  var b = bint.abs();
  var q,r,ret;
  var qsign, rsign;

  /* Some checks (NaN, Inf) ommited */

  if(b.isZero()){
    return (b.sign == MP_NEG)?Bigint.NEGATIVE_INFINITY
                             :Bigint.POSITIVE_INFINITY;
  }

  // Single digit b
  if(b.used == 1){
    return a.divremInt(b.dp[0]);
  }

  // a < b
  if(a.cmp_mag(b) == MP_LT ){
    return [new Bigint(0),a];
  }

  qsign = ( (a.sign * b.sign) < 0)?MP_NEG:MP_ZPOS;
  rsign = (a.sign = MP_NEG)?MP_NEG:MP_ZPOS;

  ret = a.kdivrem(b);
  // long version
  q = ret[0];
  r = ret[1];
  q.sign = qsign;
  r.sign = rsign;
  /* 
      Arguments given to kdivrem() must be clean
      to avoid error checking here.
   */
  return [q,r]
};

A short sketch for kdivrem

Bigint.prototype.divrem = function(bint){
  var divmnu = function (q,r,u,v,m,n) {
     ...
   return [q,r];
  };
  var U = this.dp;
  var M = this.used;
  var V = bint.dp;
  var N = bint.used;
  var Q = new Bigint(0);
  var R = new Bigint(0);
  return divmnu(Q.dp,R.dp,U,V,M,N);
};

A lot of temporary memory is used but avoiding it would make the code quite complicated, too complicated for the condition “good readability”.

There are some general optimizations possible:

  1. division by 2^n \text{with } n \in\mathbb{Z} which are just shifts
  2. division by 3 (5) which uses an approximation first
  3. Divide & Conquer algorithms
  4. Barret’s algorithm
  5. Multiply with inverse (Newton’s method)

The first point is a must, item two is easy to implement, so I’ll do it, position three, Well, I tried one with libtommath but the gain was small and only with very large numbers and not even always—Barret’s algorithm and Newton’s method are fast[1] and relatively simple to implement on the other hand, so they win this contest.

I won’t treat school book addition and multiplication here, that is really left as an exercise for the reader 😉

Now that I have the basics, I’m going to use it.

  • exponentiation
  • squaring
  • (E)GCD
  • Square- and other roots
  • Some operations modulo x

I think I’ll treat multiplication in my next post. Squaring, Toom-Cook (incl. Karatsuba) and, if that thing doesn’t grow too big FFT multiplication. Not to forget operator balancing which can gain quite much for multiplication.

[1] Hasselström, Karl. “Fast Division of Large Integers.” (2003).

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s