# On the numerical evaluation of factorials IV

## Algorithms for Faster Multiplication

And finally I proudly present…uh, whatever… here are the Toom-Cook 2,3,(4) and FFT functions to speed up multiplication and squaring.

This function to divide a Bigdecimal by a Number is used in the Toom-Cook multiplication algorithms, so, as much as I am disapointed about forgetting it, it is listed here.

```Bigdecimal.prototype.divD = function(n){
var t    = this.digits;
var tlen = this.digits.length;
var tsgn = this.sign;
var bsgn = (n<0)?-1:1;
var ret  = new Bigdecimal();

var carry = 0;

ret.sign = (tsgn == bsgn)?1:-1;
ret.digits = new Array(tlen);
ret.used = tlen;
for(var i=tlen-1;i>=0;i--){
var temp  = (t[i]+carry)/n;
var ftemp = Math.floor(temp);
carry = 0;
if(temp != ftemp){
//carry += Math.BIGDECIMAL_BASE*Math.round((temp-ftemp)*n);
carry += (Math.BIGDECIMAL_BASE*carry +t[i]) %n
}
//ret.digits.unshift(ftemp);
ret.digits[i] = ftemp;
//ret.used++;
}
ret.clamp();
return ret;
};
```

The Toom-Cook 2 algorithm, better known as the Karatsuba algorithm.

```Bigdecimal.prototype.karatsuba = function(bdec){

var x0,x1,y0,y1,z0,z1,z2,temp,xy;

var tlen = this.used;
var blen = bdec.used;

x0 = new Bigdecimal();
x1 = new Bigdecimal();
y0 = new Bigdecimal();
y1 = new Bigdecimal();

var m = Math.min(tlen >> 1, blen >> 1);
if(m <= 20)return this.lowlevel_mul(bdec)
x1.digits = this.digits.slice(m,tlen);
x1.used = x1.digits.length;
x1.sign = this.sign;

x0.digits = this.digits.slice(0,m);
x0.used = x0.digits.length;
x0.sign = this.sign;

y1.digits = bdec.digits.slice(m,blen);
y1.used = y1.digits.length;
y1.sign = bdec.sign;

y0.digits = bdec.digits.slice(0,m);
y0.used = y0.digits.length;
y0.sign = bdec.sign;

z2 = x1.karatsuba(y1);
z0 = x0.karatsuba(y0);

// z1 = (x1 + x0)(y1 + y0) - z2 - z0
// z2 * B^(2*m) + z1 * B^m + z0

xy.sign = this.sign * bdec.sign;
return xy;
};
```

The Toom-Cook 3 algorithm.

```Bigdecimal.prototype.toomCookMul = function(bdec){
var a0,a1,a2,b0,b1,b2,b3,tmp1,tmp2,w0,w1,w2,w3,w4,c;
var tmp
var tlen = this.used;
var blen = bdec.used;

var tsign = (this.s<0)?-1:1;
var bsign = (this.s<0)?-1:1;
sign = tsign * bsign;

a0 = new Bigdecimal();
a1 = new Bigdecimal();
a2 = new Bigdecimal();

b0 = new Bigdecimal();
b1 = new Bigdecimal();
b2 = new Bigdecimal();

var m = Math.floor(Math.min(tlen,blen)/3);
if(m <= 40)return this.karatsuba(bdec)
a0.digits = this.digits.slice(0,m);        // a(mod l^B)
a0.used   = a0.digits.length;
a0.sign   = this.sign;

a1.digits = this.digits.slice(m,2*m);      // a/(l^B),   a1(mod l^B)
a1.used   = a1.digits.length;
a1.sign   = this.sign;

a2.digits = this.digits.slice(2*m,tlen);   // a/(l^(2B)),a2(mod l^B)
a2.used   = a2.digits.length;
a2.sign   = this.sign;
// same with second factor

b0.digits = bdec.digits.slice(0,m);        // b(mod l^B)
b0.used   = b0.digits.length;
b0.sign   = this.sign;

b1.digits = bdec.digits.slice(m,2*m);      // b/(l^B),   b1(mod l^B)
b1.used   = b1.digits.length;
b1.sign   = this.sign;

b2.digits = bdec.digits.slice(2*m,blen);   // b/(l^(2B)),b2(mod l^B)
b2.used   = b2.digits.length;
b2.sign   = this.sign;

w0 = a0.toomCookMul(b0);
w4 = a2.toomCookMul(b2);

//w1 = (a2 + 2(a1 + 2a0))        (b2 + 2(b1 + 2b0))

//w3 = (a0 + 2(a1 + 2a2))       (b0 + 2(b1 + 2b2))

//w2 = (a2 + a1 + a0)           (b2 + b1 + b0)

// solve
// 18a
w1 = w1.sub(w4) // c
// 18b
w3 = w3.sub(w0) // c
// 19a
w1 = w1.divD(2); // c
// 19b
w3 = w3.divD(2); //
// 20a,b
w2 = w2.sub(w0).sub(w4);   // c
// 21a
w1 = w1.sub(w2);          // c
// 21b
w3 = w3.sub(w2);          // c
// 22a,b
w1 = w1.sub(w0.mulD(8));        // c
// 22c,d
w3 = w3.sub(w4.mulD(8));        // c
// 23a,b,c
w2 = w2.mulD(3).sub(w1).sub(w3);          // c
// 24a
w1 = w1.sub(w2);          // c
// 24b

w3 = w3.sub(w2);          // c
//alert(tmp +" - "+ w2 +"-"+ w3)
// 25a
w1 = w1.divD(3);      // c
// 25b
w3 = w3.divD(3);      // c
// w0 = w0.dlShift(0*m)
w1 = w1.dShiftLeft(1*m);     // c
w2 = w2.dShiftLeft(2*m);     // c
w3 = w3.dShiftLeft(3*m);     // c
w4 = w4.dShiftLeft(4*m);     // c

c.sign = (sign<0)?-1:1;
return c;
};
```

The Toom-Cook 4 algorithm (not tested, therefore not used).

```Bigdecimal.prototype.toomCookMul4 = function(bdec){
var a0,a1,a2,a3,b0,b1,b2,b3,b4,c;
var S1,S2,S3,S4,S5,S6,S7;

var tlen = this.used;
var blen = bdec.used;

var tsign = (this.s<0)?-1:1;
var bsign = (this.s<0)?-1:1;
sign = tsign * bsign;

a0 = new Bigdecimal();
a1 = new Bigdecimal();
a2 = new Bigdecimal();
a3 = new Bigdecimal();

b0 = new Bigdecimal();
b1 = new Bigdecimal();
b2 = new Bigdecimal();
b3 = new Bigdecimal();

var m = Math.floor(Math.min(tlen,blen)/4);
if(m <= 40)return this.lowlevel_mul(bdec);
a0.digits = this.digits.slice(0,m);
a0.used   = a0.digits.length;
a0.sign   = this.sign;

a1.digits = this.digits.slice(m,2*m);
a1.used   = a1.digits.length;
a1.sign   = this.sign;

a2.digits = this.digits.slice(2*m,3*m);
a2.used   = a2.digits.length;
a2.sign   = this.sign;

a3.digits = this.digits.slice(3*m,tlen);
a3.used   = a3.digits.length;
a3.sign   = this.sign;

b0.digits = bdec.digits.slice(0,m);
b0.used   = b0.digits.length;
b0.sign   = bdec.sign;

b1.digits = bdec.digits.slice(m,2*m);
b1.used   = b1.digits.length;
b1.sign   = bdec.sign;

b2.digits = bdec.digits.slice(2*m,3*m);
b2.used   = b2.digits.length;
b2.sign   = bdec.sign;

b3.digits = bdec.digits.slice(3*m,blen);
b3.used   = b3.digits.length;
b3.sign   = bdec.sign;

S1 = a3.lowlevel_mul(b3);
//(8*a3+4*a2+2*a1+a0)*(8*b3+4*b2+2*b1+b0)
//PRINT("(8*"+a3+"+ 4*"+a2+"+ 2*"+a1+" +"+a0+" )*(8*"+b3+" +4*"+b2+" +2*"+b1+" +"+b0+") - " +S2 + "<br>")
//(+a3+a2+a1+a0)*(+b3+b2+b1+b0)
// (-a3+a2-a1+a0)*(-b3+b2-b1+b0)
// (+8*a0+4*a1+2*a2+a3)*(+8*b0+4*b1+2*b2+b3)
//(-8*a0+4*a1-2*a2+a3)*(-8*b0+4*b1-2*b2+b3)

S7 = a0.lowlevel_mul(b0);

S2 = S2.add(S5);//PRINT(S4+" - "+S3+" - "+S4.sub(S3)+"<br>")
S4 = S4.sub(S3);//PRINT(S6+" - "+S5+" - "+S6.sub(S5)+"<br>")
S6 = S6.sub(S5);//PRINT(S4+"/2 - "+S4.divD(2)+"<br>")
S4 = S4.divD(2);//PRINT(S5+" - "+S1+" - "+S5.sub(S1)+"<br>")
S5 = S5.sub(S1);//PRINT(S5+" - ("+S7+" * 64) - "+S5.sub((S7.mulD(64)))+"<br>")
S5 = S5.sub((S7.mulD(64)));//PRINT(S3+" + "+S4+" - "+S3.add(S4)+"<br>")
S5 = S5.add(S6);//PRINT(S2+" - ("+S3+" * 65) - "+S2.sub((S3.mulD(65)))+"<br>")
S2 = S2.sub((S3.mulD(65)));//PRINT(S3+" - "+S1+" - "+S3.sub(S1)+"<br>")
S3 = S3.sub(S1);//PRINT(S3+" - "+S7+" - "+S3.sub(S7)+"<br>")
S3 = S3.sub(S7);//PRINT(S4+"<br>"+S4.neg()+"<br>")
S4 = S4.neg();//PRINT(S6+"<br>"+S6.neg()+"<br>")
S6 = S6.neg();//PRINT(S2+" + ("+S3+" * 45) - "+S2.add((S3.mulD(45)))+"<br>")
S2 = S2.add((S3.mulD(45)));//PRINT(S5+" - ("+S3+" * 8) - "+S5.sub((S3.mulD(8)))+"<br>")
S5 = S5.sub((S3.mulD(8)));//PRINT(S5+"/24 - "+S5.divD(24)+"<br>")
S5 = S5.divD(24);
S6 = S6.sub(S2);
S2 = S2.sub((S4.mulD(16)));
S2 = S2.divD(18);
S3 = S3.sub(S5);
S4 = S4.sub(S2);
S6 = S6.divD(60);
S2 = S2.sub(S6);

S1 = S1.dShiftLeft(6*m);
S2 = S2.dShiftLeft(5*m);
S3 = S3.dShiftLeft(4*m);
S4 = S4.dShiftLeft(3*m);
S5 = S5.dShiftLeft(2*m);
S6 = S6.dShiftLeft(1*m);
//S7 = S7.dShiftLeft(0*m);
//PRINT(S1 +" + "+S2 +" + "+S3 +" + "+S4 +" + "+S5 +" + "+S6 +" + "+S7 +" - "+
c.sign = (sign<0)?-1:1;
return c
};
```

The long awaited FFT-multiplication. If that is not the most crude
implementation I’d eat my hat a banana.

```Bigdecimal.prototype.mulFFT  = function(bdec,base){
var nextPow2 = function(n){
var lg2 = Math.ceil(Math.log(n)*Math.LOG2E);
return Math.pow(2,lg2);
};
var ispow2 = function(x){
if(x<2)return false;
if(x&(x-1))return false;
return true;
};
var bitlength = function(x){
if(x<2) {return undefined;}
for(var i=0;i<52;i++){
if(x&(1<<i))return i;
}
};
var esrever = function(k,n){
var rev = 0;
for(var i=0;i<n;i++){
rev = (rev<<1)|(k&1);
k>>=1;
}
return rev;
};

var normalize = function(a){
var t     = a;
var tlen  = a.length;

var ret   = new Array(tlen);
var carry = 0;
for(var i=0;i<tlen;i++){
var temp = carry;
carry = 0;
temp += Math.round(t[i]);
if(temp >= Math.BIGDECIMAL_BASE){
carry = Math.floor(temp/Math.BIGDECIMAL_BASE);
temp  = temp % Math.BIGDECIMAL_BASE;
}
ret[i] = (temp);
}
if(carry)ret.push(carry);
//while(ret[tlen-1] == 0){ret.pop();tlen--;}
return ret;
};

var fftsimple = function(Ri, Ii,n,dir){
var nbits;
var bsize; var bend = 1;

var angle = 2.0 * Math.PI;
var tempreal, tempimaginary;
var Ro = new Array(Ri.length);
var Io = new Array(Ri.length);

var ar  = new Array(3);
var ai  = new Array(3);

if(!ispow2(n))return undefined;
if(dir)angle = -angle;

nbits = bitlength(n);
for(var i=0;i<n;i++){
j = esrever(i,nbits);
Ro[j] = Ri[i];
Io[j] = Ii[i];
}
for(bsize=2;bsize<=n;bsize<<=1){
var quotangle = angle/bsize;
var onesinangle = Math.sin ( -quotangle );
var twosinangle = Math.sin ( -2 * quotangle );
var onecosangle = Math.cos ( -quotangle );
var twocosangle = Math.cos ( -2 * quotangle );
var twiceonecosangle   = 2 * onecosangle;

for(var i=0;i<n;i+=bsize){
ar[2] = twocosangle;ar[1] = onecosangle;
ai[2] = twosinangle;ai[1] = onesinangle;
for(var j=i,l=0;l<bend;j++,l++ ){
ar[0] = twiceonecosangle*ar[1] - ar[2];
ar[2] = ar[1];ar[1] = ar[0];
ai[0] = twiceonecosangle*ai[1] - ai[2];
ai[2] = ai[1];ai[1] = ai[0];
var k = j + bend;
tempreal      = ar[0]*Ro[k] - ai[0]*Io[k];
tempimaginary = ar[0]*Io[k] + ai[0]*Ro[k];
Ro[k] = Ro[j] - tempreal;
Io[k] = Io[j] - tempimaginary;
Ro[j] += tempreal;
Io[j] += tempimaginary;
}
}
bend = bsize;
}
if(dir){
for(i=0;i<n;i++){
Ro[i] /= n;Io[i] /= n;
}
}
return [Ro,Io]
};
var fftconv = function(ARi,AIi,BRi,BIi,n){
var CRo = new Array(n);
var CIo = new Array(n);
for(var i=0;i<n;i++){
CRo[i] = ARi[i]*BRi[i]-AIi[i]*BIi[i];
CIo[i] = ARi[i]*BIi[i]+AIi[i]*BRi[i];
}
return [CRo,CIo];
};

var mulFFT = function(ARi,AIi,BRi,BIi,n){
var retlen = 2;
var ret;
var fft1;var fft2;var conv;

while(retlen<2*n)retlen*=2;
var r1 = new Array(retlen).memset(0);
var r2 = new Array(retlen).memset(0);
var r3 = new Array(retlen).memset(0);
var r4 = new Array(retlen).memset(0);

ARi = ARi.concat(r1);
AIi = AIi.concat(r2);
BRi = BRi.concat(r3);
BIi = BIi.concat(r4);

//for(i=n;i<retlen;i++){
//ARi.push(0);AIi.push(0);
//BRi.push(0);BIi.push(0);
//}
fft1   = fftsimple(ARi,AIi,retlen,false);
fft2   = fftsimple(BRi,BIi,retlen,false);
conv   = fftconv(fft1[0],fft1[1],fft2[0],fft2[1],retlen);
return   fftsimple(conv[0],conv[1],2*n,true);
};

var ret = new Bigdecimal();
var ti,bi;

var td   = this.dup();
var bd   = bdec.dup();

bd = bd.toString()
td = td.toString()

if(arguments.length == 2){
if(base > 5) return Number.NaN;
if(base == 5){
Math.BIGDECIMAL_BASE       = 100000;
Math.BIGDECIMAL_LOG10_BASE =      5;
}
else if(base == 4){
Math.BIGDECIMAL_BASE       = 10000;
Math.BIGDECIMAL_LOG10_BASE =     4;
}
else if(base == 3){
Math.BIGDECIMAL_BASE       = 1000;
Math.BIGDECIMAL_LOG10_BASE =    3;
}
else if(base == 2){
Math.BIGDECIMAL_BASE       = 100;
Math.BIGDECIMAL_LOG10_BASE =   2;
}
else if(base == 1){
Math.BIGDECIMAL_BASE       = 10;
Math.BIGDECIMAL_LOG10_BASE =  1;
}
else
return Number.NaN;
}
else{
Math.BIGDECIMAL_BASE       = 1000;
Math.BIGDECIMAL_LOG10_BASE =    3;
}

td = td.toBigdecimal()
bd = bd.toBigdecimal()

var t    = td.digits;
var tlen = td.used;

var b    = bd.digits;
var blen = bd.used;
if(tlen<blen){
var temp = new Array(blen-tlen).memset(0)
t = t.concat(temp);
tlen = blen;
}
if(blen<tlen){
var temp = new Array(tlen-blen).memset(0)
b = b.concat(temp);
blen = tlen;
}
var pwr2 = nextPow2(tlen);
var temp1 = new Array(pwr2-tlen).memset(0);
var temp2 = new Array(pwr2-blen).memset(0);
t = t.concat(temp1);
tlen = pwr2;
b = b.concat(temp2);
blen = tlen;

ti = new Array(tlen).memset(0);
bi = new Array(blen).memset(0);

ret.digits = normalize(mulFFT(t,ti,b,bi,tlen)[0]);
ret.used = ret.digits.length;
ret.clamp();
ret = ret.toString()

Math.BIGDECIMAL_BASE       = 10000000;
Math.BIGDECIMAL_LOG10_BASE =        7;

return ret.toBigdecimal();
};
```

The FFT-function above is only good for up to `Bigdecimal.FFT_CUTOFF` long Bigdecimals, so I used the Karatsuba algorithm to hack around in a simple way.

```Bigdecimal.prototype.mulWithPoorFFT = function(bdec,limit,base){
var x0,x1,y0,y1,z0,z1,z2,temp,xy;

var tlen = this.used;
var blen = bdec.used;

x0 = new Bigdecimal();
x1 = new Bigdecimal();
y0 = new Bigdecimal();
y1 = new Bigdecimal();

var m = Math.min(tlen >> 1, blen >> 1);
var M = Math.max(tlen, blen);
if(M < 5*Bigdecimal.FFT_CUTOFF)return this.mulFFT(bdec,base)
x1.digits = this.digits.slice(m,tlen);
x1.used = x1.digits.length;
x1.sign = this.sign;

x0.digits = this.digits.slice(0,m);
x0.used = x0.digits.length;
x0.sign = this.sign;

y1.digits = bdec.digits.slice(m,blen);
y1.used = y1.digits.length;
y1.sign = bdec.sign;

y0.digits = bdec.digits.slice(0,m);
y0.used = y0.digits.length;
y0.sign = bdec.sign;

z2 = x1.mulWithPoorFFT(y1,limit,base);
z0 = x0.mulWithPoorFFT(y0,limit,base);

// z1 = (x1 + x0)(y1 + y0) - z2 - z0

return xy;
}
```

```Bigdecimal.prototype.squareNormal = function(){
var t    = this.digits;
var tlen = this.used;
var ret  = new Bigdecimal();
var retv;
var uv = 0;
var carry = 0;
var i,j
ret.digits = new Array(2*tlen+1).memset(0);
ret.used = 2*tlen+1;
retv = ret.digits;

for(i=0;i<tlen;i++){
uv       = retv[2*i]+ t[i]*t[i];
retv[2*i] = uv%Math.BIGDECIMAL_BASE;             // v
carry    = Math.floor(uv/Math.BIGDECIMAL_BASE);  // u
for(j=i+1;j<tlen;j++){
uv       = retv[i+j]+2*t[j]*t[i]+carry;
retv[i+j] = uv%Math.BIGDECIMAL_BASE;            // v
carry    = Math.floor(uv/Math.BIGDECIMAL_BASE); // u
}
retv[i+j] = carry;                         // u
}
//  if(carry)ret.digits.push(carry);
ret.clamp();
return ret;
};
```

Karatsuba squaring. Developing stopped here for now, I found no time yet to port the other Toom-Cook algorithms to Javascript.

```Bigdecimal.prototype.karatsubaSquare = function(){

var x0,x1,z0,z2,xy;

var tlen = this.used;

x0 = new Bigdecimal();
x1 = new Bigdecimal();

var m = tlen >> 1;
if(m <= 20)return this.squareNormal();
x1.digits = this.digits.slice(m,tlen);
x1.used = x1.digits.length;

x0.digits = this.digits.slice(0,m);
x0.used = x0.digits.length;

z2 = x1.karatsubaSquare();
z0 = x0.karatsubaSquare();

xy.sign = 1;

return xy;
};
```

The Toom-Cook 3 algorithm for squaring.

```Bigdecimal.prototype.toomCookSquare = function(){
var a0,a1,a2,tmp1,w0,w1,w2,w3,w4,c;
var tmp
var tlen = this.used;

sign = 1;

a0 = new Bigdecimal();
a1 = new Bigdecimal();
a2 = new Bigdecimal();

var m = Math.floor(tlen/3);
if(m <= 40)return this.karatsubaSquare(bdec)
a0.digits = this.digits.slice(0,m);        // a(mod l^B)
a0.used   = a0.digits.length;
a0.sign   = this.sign;

a1.digits = this.digits.slice(m,2*m);      // a/(l^B),   a1(mod l^B)
a1.used   = a1.digits.length;
a1.sign   = this.sign;

a2.digits = this.digits.slice(2*m,tlen);   // a/(l^(2B)),a2(mod l^B)
a2.used   = a2.digits.length;
a2.sign   = this.sign;

w0 = a0.toomCookMulSquare();
w4 = a2.toomCookSquare();

tmp1 = tmp1.divD(2);

w1 = w1.sub(tmp1).sub(w3);

w2 = tmp1.sub(w4).sub(w0);

w1 = w1.dShiftLeft(1*m);     // c
w2 = w2.dShiftLeft(2*m);     // c
w3 = w3.dShiftLeft(3*m);     // c
w4 = w4.dShiftLeft(4*m);     // c

c.sign = 1;
return c;
};
```

The Toom-Cook 4 algorithm for squaring (not tested, therefore not used).

```Bigdecimal.prototype.toomCook4Square = function(){
var a0,a1,a2,a4,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6,tmp7,tmp8,
var w0,w1,w2,w3,w4,w5,w6,w7,c;

var tlen = this.used;

sign = 1;

a0 = new Bigdecimal();
a1 = new Bigdecimal();
a2 = new Bigdecimal();
a3 = new Bigdecimal();

var m = Math.floor(tlen/4);
if(m <= 40)return this.karatsubaSquare(bdec)
a0.digits = this.digits.slice(0,m);        // a(mod l^B)
a0.used   = a0.digits.length;
a0.sign   = this.sign;

a1.digits = this.digits.slice(m,2*m);      // a/(l^B),   a1(mod l^B)
a1.used   = a1.digits.length;
a1.sign   = this.sign;

a2.digits = this.digits.slice(2*m,3*m);   // a/(l^(2B)),a2(mod l^B)
a2.used   = a2.digits.length;
a2.sign   = this.sign;

a3.digits = this.digits.slice(3*m,tlen);
a3.used   = a3.digits.length;
a3.sign   = this.sign;

w1 = a0.toomCook4Square();

w5 = (a0.sub(a2)).mulD(2).mul(a1.sub(a3));

w6 = a3.mul(a2).mulD(2);

w7 = a3.toomCook4Square();

tmp1 = w3.add(w4);             // T1 = S3 + S4
tmp2 = (tmp1.add(w5)).divD(2); // T2 = (T1 + S5)/2
tmp3 = w2.add(w6);             // T3 = S2 + S6
tmp4 = tmp2.sub(tmp3);         // T4 = T2 - T3
tmp5 = tmp3.sub(w5);           // T5 = T3 - S5
tmp6 = tmp4.sub(w1);           // T6 = T4 - S3
tmp7 = tmp4.sub(w1);           // T7 = T4 - S1
tmp8 = tmp6.sub(w7);           // T8 = T6 - S7

w1 = w7.dShiftLeft(6*m);
w2 = w6.dShiftLeft(5*m);
w3 = tmp7.dShiftLeft(4*m);
w4 = tmp5.dShiftLeft(3*m);
w5 = tmp8.dShiftLeft(2*m);
w6 = w2.dShiftLeft(1*m);
w7 = w1;

c.sign = 1;
return c;
};
```

The FFT algorithm for squaring.

```Bigdecimal.prototype.squareFFT  = function(base){

var nextPow2 = function(n){
var lg2 = Math.ceil(Math.log(n)*Math.LOG2E);
return Math.pow(2,lg2);
};
var ispow2 = function(x){
if(x<2)return false;
if(x&(x-1))return false;
return true;
};
var bitlength = function(x){
if(x<2) {return undefined;}
for(var i=0;;i++){
if(x&(1<<i))return i;
}
};
var esrever = function(k,n){
var rev = 0;
for(var i=0;i<n;i++){
rev = (rev<<1)|(k&1);
k>>=1;
}
return rev;
};

var normalize = function(a){
var t     = a;
var tlen  = a.length;

var ret   = new Array();
var carry = 0;
for(var i=0;i<tlen;i++){
var temp = carry;
carry = 0;
temp += Math.round(t[i]);
if(temp >= Math.BIGDECIMAL_BASE){
carry = Math.floor(temp/Math.BIGDECIMAL_BASE);
temp  = temp % Math.BIGDECIMAL_BASE;
}
ret.push(temp);
}
if(carry)ret.push(carry);
while(ret[tlen-1] == 0){ret.pop();tlen--;}
return ret;
};

var fftsimple = function(Ri, Ii,n,dir){
var nbits;
var bsize; var bend = 1;

var angle = 2.0 * Math.PI;
var tempreal, tempimaginary;
var Ro = new Array(Ri.length);
var Io = new Array(Ri.length);

var ar  = new Array(3);
var ai  = new Array(3);

if(!ispow2(n))return undefined;
if(dir)angle = -angle;

nbits = bitlength(n);
for(var i=0;i<n;i++){
j = esrever(i,nbits);
Ro[j] = Ri[i];
Io[j] = Ii[i];
}
for(bsize=2;bsize<=n;bsize<<=1){
var quotangle = angle/bsize;
var onesinangle = Math.sin ( -quotangle );
var twosinangle = Math.sin ( -2 * quotangle );
var onecosangle = Math.cos ( -quotangle );
var twocosangle = Math.cos ( -2 * quotangle );
var twiceonecosangle   = 2 * onecosangle;

for(var i=0;i<n;i+=bsize){
ar[2] = twocosangle;ar[1] = onecosangle;
ai[2] = twosinangle;ai[1] = onesinangle;
for(var j=i,l=0;l<bend;j++,l++ ){
ar[0] = twiceonecosangle*ar[1] - ar[2];
ar[2] = ar[1];ar[1] = ar[0];
ai[0] = twiceonecosangle*ai[1] - ai[2];
ai[2] = ai[1];ai[1] = ai[0];
var k = j + bend;
tempreal      = ar[0]*Ro[k] - ai[0]*Io[k];
tempimaginary = ar[0]*Io[k] + ai[0]*Ro[k];
Ro[k] = Ro[j] - tempreal;
Io[k] = Io[j] - tempimaginary;
Ro[j] += tempreal;
Io[j] += tempimaginary;
}
}
bend = bsize;
}
if(dir){
for(i=0;i<n;i++){
Ro[i] /= n;Io[i] /= n;
}
}
return [Ro,Io]
};
var fftconv = function(ARi,AIi,BRi,BIi,n){
var CRo = new Array(n);
var CIo = new Array(n);
for(var i=0;i<n;i++){
CRo[i] = ARi[i]*BRi[i]-AIi[i]*BIi[i];
CIo[i] = ARi[i]*BIi[i]+AIi[i]*BRi[i];
}
return [CRo,CIo];
};

var mulFFT = function(ARi,AIi,n){
var retlen = 2;
var ret;
var fft1;var conv;

while(retlen<2*n)retlen*=2;
for(i=n;i<retlen;i++){
ARi.push(0);AIi.push(0);
}
fft1   = fftsimple(ARi,AIi,retlen,false);
conv   = fftconv(fft1[0],fft1[1],fft1[0],fft1[1],retlen);
return   fftsimple(conv[0],conv[1],2*n,true);
};

var ret = new Bigdecimal();
var ti,bi;

var td   = this.dup();

td = td.toString();

if(arguments.length == 2){
if(base > 5) return Number.NaN;
if(base == 5){
Math.BIGDECIMAL_BASE       = 100000;
Math.BIGDECIMAL_LOG10_BASE =      5;
}
else if(base == 4){
Math.BIGDECIMAL_BASE       = 10000;
Math.BIGDECIMAL_LOG10_BASE =     4;
}
else if(base == 3){
Math.BIGDECIMAL_BASE       = 1000;
Math.BIGDECIMAL_LOG10_BASE =    3;
}
else if(base == 2){
Math.BIGDECIMAL_BASE       = 100;
Math.BIGDECIMAL_LOG10_BASE =   2;
}
else if(base == 1){
Math.BIGDECIMAL_BASE       = 10;
Math.BIGDECIMAL_LOG10_BASE =  1;
}
else
return Number.NaN;
}
else{
Math.BIGDECIMAL_BASE       = 1000;
Math.BIGDECIMAL_LOG10_BASE =    3;
}

td = td.toBigdecimal();

var t    = td.digits;
var tlen = td.used;

var pwr2 = nextPow2(tlen);
var temp1 = new Array(pwr2-tlen).memset(0);
t = t.concat(temp1);
tlen = pwr2;

ti = new Array(tlen).memset(0);

ret.digits = normalize(mulFFT(t,ti,tlen)[0]);
ret.used = ret.digits.length;
ret = ret.toString()

Math.BIGDECIMAL_BASE       = 10000000;
Math.BIGDECIMAL_LOG10_BASE =        7;

return ret.toBigdecimal();
};
```

This is the same hack as with the normal FFT.

```Bigdecimal.prototype.squareWithPoorFFT = function(limit,base){
var x0,x1,x1x1,x0x0,t1,t2,b;

var tlen = this.used;

x0 = new Bigdecimal();
x1 = new Bigdecimal();

var m = (tlen >> 1);
var M = (tlen);
if(M <= limit)return this.squareFFT(base)
x1.digits = this.digits.slice(m,tlen);
x1.used = x1.digits.length;
x1.sign = this.sign;

x0.digits = this.digits.slice(0,m);
x0.used = x0.digits.length;
x0.sign = this.sign;

x1x1 = x1.squareWithPoorFFT(limit,base);
x0x0 = x0.squareWithPoorFFT(limit,base);

t1 = t1.squareWithPoorFFT(limit,base);

t1 = t1.sub(t2);

t1 = t1.dShiftLeft(m);
x1x1 = x1x1.dShiftLeft(2*m);

b.sign = 1;

return b;
};
```

The high-level squaring.

```Bigdecimal.prototype.square = function(){
if(this.used < 2*Bigdecimal.KARATSUBA_CUTOFF){
return this.squareNormal();
}
else if(this.used < 3*Bigdecimal.TOOM_COOK_CUTOFF){
return this.karatsubaSquare();
}
else if(this.used < 4*Bigdecimal.TOOM_COOK4_CUTOFF){
return this.toomCookSquare();
//}
//else if(this.used < 5*Bigdecimal.TOOM_COOK5_CUTOFF){
//return this.toomCook4Square();
//}
//else if(this.used < 5*Bigdecimal.FFT_CUTOFF){
//return this.toomCook5Square();
else
return this.squareFFT();
}

return this.squareFFT();
};
```

All that is poorly tested and may suffer heavily from C&P errors.
But it is a start.