module.exports = function buildTomCook(module, _prefix) { const prefix = _prefix || "tomcook"; function buildMul1() { const f = module.addFunction(prefix+"_mul1"); f.addParam("x", "i32"); f.addParam("y", "i32"); f.addParam("r", "i32"); f.addLocal("m0", "i64"); f.addLocal("m1", "i64"); f.addLocal("m2", "i64"); f.addLocal("m02", "i64"); f.addLocal("p0", "i64"); f.addLocal("p1", "i64"); f.addLocal("pn1", "i64"); f.addLocal("pn2", "i64"); f.addLocal("pi", "i64"); f.addLocal("q0", "i64"); f.addLocal("q1", "i64"); f.addLocal("qn1", "i64"); f.addLocal("qn2", "i64"); f.addLocal("qi", "i64"); f.addLocal("r0", "i64"); f.addLocal("r1", "i64"); f.addLocal("rn1", "i64"); f.addLocal("rn2", "i64"); f.addLocal("ri", "i64"); f.addLocal("s0", "i64"); f.addLocal("s1", "i64"); f.addLocal("s2", "i64"); f.addLocal("s3", "i64"); f.addLocal("s4", "i64"); f.addLocal("c", "i64"); const c = f.getCodeBuilder(); f.addCode( // calculate p c.setLocal("m0", c.i64_load32_u( c.getLocal("x"), 0)), c.setLocal("m1", c.i64_load32_u( c.getLocal("x"), 4)), c.setLocal("m2", c.i64_load32_u( c.getLocal("x"), 8)), c.setLocal("m02", c.i64_add( c.getLocal("m0"), c.getLocal("m2"))), c.setLocal("p0", c.getLocal("m0")), c.setLocal("p1" , c.i64_add( c.getLocal("m02"),c.getLocal("m1"))), c.setLocal("pn1", c.i64_sub( c.getLocal("m02"),c.getLocal("m1"))), c.setLocal("pn2", c.i64_sub( c.i64_shl( c.i64_add( c.getLocal("pn1"), c.getLocal("m2")), c.i64_const(1) ), c.getLocal("m0") )), c.setLocal("pi", c.getLocal("m2")), // calculate q and intercalate r c.setLocal("m0", c.i64_load32_u( c.getLocal("y"), 0)), c.setLocal("m1", c.i64_load32_u( c.getLocal("y"), 4)), c.setLocal("m2", c.i64_load32_u( c.getLocal("y"), 8)), c.setLocal("m02", c.i64_add( c.getLocal("m0"), c.getLocal("m2"))), c.setLocal("q0", c.getLocal("m0")), c.setLocal("r0", c.i64_mul(c.getLocal("p0"), c.getLocal("q0"))), c.setLocal("q1" , c.i64_add( c.getLocal("m02"),c.getLocal("m1"))), c.setLocal("r1", c.i64_mul(c.getLocal("p1"), c.getLocal("q1"))), c.setLocal("qn1", c.i64_sub( c.getLocal("m02"),c.getLocal("m1"))), c.setLocal("rn1", c.i64_mul(c.getLocal("pn1"), c.getLocal("qn1"))), c.setLocal("qn2", c.i64_sub( c.i64_shl( c.i64_add( c.getLocal("qn1"), c.getLocal("m2") ), c.i64_const(1) ), c.getLocal("m0") )), c.setLocal("rn2", c.i64_mul(c.getLocal("pn2"), c.getLocal("qn2"))), c.setLocal("qi", c.getLocal("m2")), c.setLocal("ri", c.i64_mul(c.getLocal("pi"), c.getLocal("qi"))), // Interpolation c.setLocal("s0", c.getLocal("r0")), c.setLocal("s4", c.getLocal("ri")), c.setLocal("s3", c.i64_div_s( c.i64_sub( c.getLocal("rn2"), c.getLocal("r1") ), c.i64_const(3) )), c.setLocal("s1", c.i64_shr_s( c.i64_sub( c.getLocal("r1"), c.getLocal("rn1") ), c.i64_const(1) )), c.setLocal("s2", c.i64_sub(c.getLocal("rn1"), c.getLocal("r0"))), c.setLocal("s3", c.i64_add( c.i64_shr_s( c.i64_sub( c.getLocal("s2"), c.getLocal("s3") ), c.i64_const(1) ), c.i64_shl( c.getLocal("ri"), c.i64_const(1) ) )), c.setLocal("s2", c.i64_add( c.i64_sub( c.getLocal("s2"), c.getLocal("s4") ), c.getLocal("s1"), )), c.setLocal("s1", c.i64_sub( c.getLocal("s1"), c.getLocal("s3") )), // Recomposition c.setLocal("c", c.getLocal("s0")), c.i64_store32( c.getLocal("r"), 0, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_shr_u( c.getLocal("c"), c.i64_const(29) ), c.getLocal("s1") ) ), c.i64_store32( c.getLocal("r"), 4, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_shr_u( c.getLocal("c"), c.i64_const(29) ), c.getLocal("s2") ) ), c.i64_store32( c.getLocal("r"), 8, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_shr_u( c.getLocal("c"), c.i64_const(29) ), c.getLocal("s3") ) ), c.i64_store32( c.getLocal("r"), 12, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_shr_u( c.getLocal("c"), c.i64_const(29) ), c.getLocal("s4") ) ), c.i64_store32( c.getLocal("r"), 16, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_shr_u( c.getLocal("c"), c.i64_const(29) ), ), c.i64_store32( c.getLocal("r"), 20, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ) ); } function buildMul(n) { const f = module.addFunction(prefix+"_mul"+n); f.addParam("x", "i32"); f.addParam("y", "i32"); f.addParam("r", "i32"); f.addLocal("s", "i32"); const c = f.getCodeBuilder(); f.addCode( c.if( c.i32_and( c.i32_load( c.getLocal("x"), (n-1)*4 ), c.i32_const(0x80000000) ), c.if( c.i32_and( c.i32_load( c.getLocal("y"), (n-1)*4 ), c.i32_const(0x80000000) ), [ ...c.call(prefix + "_mulnn" + n, c.getLocal("x"), c.getLocal("y"), c.getLocal("r")), ], [ ...c.call(prefix + "_mulnp" + n, c.getLocal("x"), c.getLocal("y"), c.getLocal("r")), ] ), c.if( c.i32_and( c.i32_load( c.getLocal("y"), (n-1)*4 ), c.i32_const(0x80000000) ), [ ...c.call(prefix + "_mulnp" + n, c.getLocal("y"), c.getLocal("x"), c.getLocal("r")), ], [ ...c.call(prefix + "_mulu" + n, c.getLocal("x"), c.getLocal("y"), c.getLocal("r")), ] ), ) ); } function buildMulU3() { const f = module.addFunction(prefix+"_mulu3"); f.addParam("x", "i32"); f.addParam("y", "i32"); f.addParam("r", "i32"); f.addLocal("a0", "i64"); f.addLocal("a1", "i64"); f.addLocal("a2", "i64"); f.addLocal("b0", "i64"); f.addLocal("b1", "i64"); f.addLocal("b2", "i64"); f.addLocal("c", "i64"); const c = f.getCodeBuilder(); f.addCode( // calculate p c.setLocal("a0", c.i64_load32_u( c.getLocal("x"), 0)), c.setLocal("b0", c.i64_load32_u( c.getLocal("y"), 0)), c.setLocal("c", c.i64_mul( c.getLocal("a0"), c.getLocal("b0") )), c.i64_store32( c.getLocal("r"), 0, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("a1", c.i64_load32_u( c.getLocal("x"), 4)), c.setLocal("b1", c.i64_load32_u( c.getLocal("y"), 4)), c.setLocal("c", c.i64_add( c.i64_shr_u( c.getLocal("c"), c.i64_const(29)), c.i64_add( c.i64_mul( c.getLocal("a0"), c.getLocal("b1") ), c.i64_mul( c.getLocal("a1"), c.getLocal("b0") ) ) )), c.i64_store32( c.getLocal("r"), 4, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("a2", c.i64_load32_u( c.getLocal("x"), 8)), c.setLocal("b2", c.i64_load32_u( c.getLocal("y"), 8)), c.setLocal("c", c.i64_add( c.i64_add( c.i64_mul( c.getLocal("a0"), c.getLocal("b2") ), c.i64_mul( c.getLocal("a2"), c.getLocal("b0") ) ), c.i64_add( c.i64_shr_u( c.getLocal("c"), c.i64_const(29)), c.i64_mul( c.getLocal("a1"), c.getLocal("b1") ), ) )), c.i64_store32( c.getLocal("r"), 8, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_add( c.i64_shr_u( c.getLocal("c"), c.i64_const(29)), c.i64_mul( c.getLocal("a1"), c.getLocal("b2")) ), c.i64_mul( c.getLocal("a2"), c.getLocal("b1") ), )), c.i64_store32( c.getLocal("r"), 12, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_mul( c.getLocal("a2"), c.getLocal("b2") ), c.i64_shr_u( c.getLocal("c"), c.i64_const(29)), )), c.i64_store32( c.getLocal("r"), 16, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.i64_store32( c.getLocal("r"), 20, c.i64_shr_u( c.getLocal("c"), c.i64_const(29)), ), ); } function buildMulNP3() { const f = module.addFunction(prefix+"_mulnp3"); f.addParam("x", "i32"); f.addParam("y", "i32"); f.addParam("r", "i32"); f.addLocal("a0", "i64"); f.addLocal("a1", "i64"); f.addLocal("a2", "i64"); f.addLocal("b0", "i64"); f.addLocal("b1", "i64"); f.addLocal("b2", "i64"); f.addLocal("c", "i64"); const c = f.getCodeBuilder(); f.addCode( // calculate p c.setLocal("a0", c.i64_add( c.i64_xor( c.i64_load32_u( c.getLocal("x"), 0), c.i64_const(0x1FFFFFFF) ), c.i64_const(1) ) ), c.setLocal("b0", c.i64_load32_u( c.getLocal("y"), 0)), c.setLocal("c", c.i64_sub( c.i64_mul( c.getLocal("a0"), c.getLocal("b0") ), c.i64_const(1) )), c.i64_store32( c.getLocal("r"), 0, c.i64_xor( c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("a1", c.i64_xor( c.i64_load32_u( c.getLocal("x"), 4), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("b1", c.i64_load32_u( c.getLocal("y"), 4)), c.setLocal("c", c.i64_add( c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), c.i64_add( c.i64_mul( c.getLocal("a0"), c.getLocal("b1") ), c.i64_mul( c.getLocal("a1"), c.getLocal("b0") ) ) )), c.i64_store32( c.getLocal("r"), 4, c.i64_xor( c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("a2", c.i64_xor( c.i64_load32_u( c.getLocal("x"), 8), c.i64_const(0xFFFFFFFF) ) ), c.setLocal("b2", c.i64_load32_u( c.getLocal("y"), 8)), c.setLocal("c", c.i64_add( c.i64_add( c.i64_mul( c.getLocal("a0"), c.getLocal("b2") ), c.i64_mul( c.getLocal("a2"), c.getLocal("b0") ) ), c.i64_add( c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), c.i64_mul( c.getLocal("a1"), c.getLocal("b1") ), ) )), c.i64_store32( c.getLocal("r"), 8, c.i64_xor( c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_add( c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), c.i64_mul( c.getLocal("a1"), c.getLocal("b2")) ), c.i64_mul( c.getLocal("a2"), c.getLocal("b1") ), )), c.i64_store32( c.getLocal("r"), 12, c.i64_xor( c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_mul( c.getLocal("a2"), c.getLocal("b2") ), c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), )), c.i64_store32( c.getLocal("r"), 16, c.i64_xor( c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ), c.i64_const(0x1FFFFFFF) ) ), c.i64_store32( c.getLocal("r"), 20, c.i64_xor( c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), c.i64_const(0xFFFFFFFF) ) ) ); } function buildMulNN3() { const f = module.addFunction(prefix+"_mulnn3"); f.addParam("x", "i32"); f.addParam("y", "i32"); f.addParam("r", "i32"); f.addLocal("a0", "i64"); f.addLocal("a1", "i64"); f.addLocal("a2", "i64"); f.addLocal("b0", "i64"); f.addLocal("b1", "i64"); f.addLocal("b2", "i64"); f.addLocal("c", "i64"); const c = f.getCodeBuilder(); f.addCode( // calculate p c.setLocal("a0", c.i64_add( c.i64_xor( c.i64_load32_u( c.getLocal("x"), 0), c.i64_const(0x1FFFFFFF) ), c.i64_const(1) ) ), c.setLocal("b0", c.i64_add( c.i64_xor( c.i64_load32_u( c.getLocal("y"), 0), c.i64_const(0x1FFFFFFF) ), c.i64_const(1) ) ), c.setLocal("c", c.i64_mul( c.getLocal("a0"), c.getLocal("b0") )), c.i64_store32( c.getLocal("r"), 0, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ), ), c.setLocal("a1", c.i64_xor( c.i64_load32_u( c.getLocal("x"), 4), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("b1", c.i64_xor( c.i64_load32_u( c.getLocal("y"), 4), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), c.i64_add( c.i64_mul( c.getLocal("a0"), c.getLocal("b1") ), c.i64_mul( c.getLocal("a1"), c.getLocal("b0") ) ) )), c.i64_store32( c.getLocal("r"), 4, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ), ), c.setLocal("a2", c.i64_xor( c.i64_load32_u( c.getLocal("x"), 8), c.i64_const(0xFFFFFFFF) ) ), c.setLocal("b2", c.i64_xor( c.i64_load32_u( c.getLocal("y"), 8), c.i64_const(0xFFFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_add( c.i64_mul( c.getLocal("a0"), c.getLocal("b2") ), c.i64_mul( c.getLocal("a2"), c.getLocal("b0") ) ), c.i64_add( c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), c.i64_mul( c.getLocal("a1"), c.getLocal("b1") ), ) )), c.i64_store32( c.getLocal("r"), 8, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_add( c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), c.i64_mul( c.getLocal("a1"), c.getLocal("b2")) ), c.i64_mul( c.getLocal("a2"), c.getLocal("b1") ), )), c.i64_store32( c.getLocal("r"), 12, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ) ), c.setLocal("c", c.i64_add( c.i64_mul( c.getLocal("a2"), c.getLocal("b2") ), c.i64_shr_s( c.getLocal("c"), c.i64_const(29)), )), c.i64_store32( c.getLocal("r"), 16, c.i64_and( c.getLocal("c"), c.i64_const(0x1FFFFFFF) ), c.i64_const(0x1FFFFFFF) ), c.i64_store32( c.getLocal("r"), 20, c.i64_shr_s( c.getLocal("c"), c.i64_const(29)) ) ); } function buildNeg(n) { const f = module.addFunction(prefix+"_neg"+n); f.addParam("x", "i32"); f.addParam("r", "i32"); f.addLocal("c", "i64"); const c = f.getCodeBuilder(); for (let i=0; i=0; i--) { if ( i==n-1) { f.addCode( c.setLocal( "c", c.i64_load32_s( c.getLocal("x"), i*4) ), c.setLocal( "sign", c.i64_shr_u( c.getLocal("c"), c.i64_const(63) ) ) ); } else { f.addCode( c.setLocal( "c", c.i64_or( c.i64_shl(c.getLocal("c"), c.i64_const(29)), c.i64_load32_u( c.getLocal("x"), i*4), ) ) ); } f.addCode( c.i64_store32( c.getLocal("r"), i*4, c.i64_and( c.i64_shr_s( c.getLocal("c"), c.i64_const(1) ), c.i64_const((i==n-1) ? 0xFFFFFFFF : 0x1FFFFFFF) ) ) ); } f.addCode( c.if( c.i32_wrap_i64( c.i64_and( c.getLocal("c"), c.getLocal("sign") ) ), c.call(prefix + "_addOne" + n , c.getLocal("r")) ) ); } function buildDivShort(n) { buildDivShortRaw(n, "pp"); buildDivShortRaw(n, "pn"); buildDivShortRaw(n, "np"); buildDivShortRaw(n, "nn"); let c; const fxp = module.addFunction(prefix+"_divshortxp"+n); c = fxp.getCodeBuilder(); fxp.addParam("x", "i32"); fxp.addParam("s", "i32"); fxp.addParam("r", "i32"); fxp.addCode( c.if( c.i32_and( c.i32_load( c.getLocal("x"), (n-1)*4 ), c.i32_const(0x80000000) ), [ ...c.call(prefix + "_divshortnp" + n, c.getLocal("x"), c.getLocal("s"), c.getLocal("r")), ], [ ...c.call(prefix + "_divshortpp" + n, c.getLocal("x"), c.getLocal("s"), c.getLocal("r")), ] ), ); const fxn = module.addFunction(prefix+"_divshortxn"+n); c = fxn.getCodeBuilder(); fxn.addParam("x", "i32"); fxn.addParam("s", "i32"); fxn.addParam("r", "i32"); fxn.addCode( c.if( c.i32_and( c.i32_load( c.getLocal("x"), (n-1)*4 ), c.i32_const(0x80000000) ), [ ...c.call(prefix + "_divshortnn" + n, c.getLocal("x"), c.getLocal("s"), c.getLocal("r")), ], [ ...c.call(prefix + "_divshortpn" + n, c.getLocal("x"), c.getLocal("s"), c.getLocal("r")), ] ), ); const f = module.addFunction(prefix+"_divshort"+n); c = f.getCodeBuilder(); f.addParam("x", "i32"); f.addParam("s", "i32"); f.addParam("r", "i32"); f.addCode( c.if( c.i32_lt_s( c.getLocal("s"), c.i32_const(0) ), [ ...c.call(prefix + "_divshortxn" + n, c.getLocal("x"), c.getLocal("s"), c.getLocal("r")), ], [ ...c.call(prefix + "_divshortxp" + n, c.getLocal("x"), c.getLocal("s"), c.getLocal("r")), ] ), ); // signs = "pp", "np", "pn", "nn" function buildDivShortRaw(n, signs) { const f = module.addFunction(prefix+"_divshort"+signs+n); f.addParam("x", "i32"); f.addParam("s32", "i32"); f.addParam("r", "i32"); f.addLocal("c", "i64"); f.addLocal("s", "i64"); const c = f.getCodeBuilder(); if (signs[1] == "n") { f.addCode(c.setLocal("s", c.i64_sub(c.i64_const(0), c.getLocal("s")))); } f.addCode( c.setLocal("s", c.i64_extend_i32_u(c.getLocal("s32"))), c.setLocal( "c", signs[0] == "p" ? c.i64_load32_u( c.getLocal("x"), (n-1)*4) : c.i64_xor( c.i64_load32_u( c.getLocal("x"), (n-1)*4), c.i64_const(0xFFFFFFFF) ) ), c.i64_store32( c.getLocal("r"), (n-1)*4, (signs == "pp") || (signs == "nn") ? c.i64_div_u( c.getLocal("c"), c.getLocal("s") ) : c.i64_xor( c.i64_div_u( c.getLocal("c"), c.getLocal("s") ), c.i64_const(0xFFFFFFFF) ) ) ); for (let i=n-2; i>=0; i--) { f.addCode( c.setLocal( "c", c.i64_or( c.i64_shl( c.i64_rem_u( c.getLocal("c"), c.getLocal("s") ), c.i64_const(29) ), signs[0] == "p" ? c.i64_load32_u( c.getLocal("x"), i*4) : c.i64_xor( c.i64_load32_u( c.getLocal("x"), i*4), c.i64_const(0x1FFFFFFF) ) ) ) ); if ((i==0) && (signs[0] == "n")) { f.addCode( c.setLocal( "c", c.i64_add( c.getLocal("c"), c.i64_const(1) ) ) ); } f.addCode( c.i64_store32( c.getLocal("r"), i*4, (signs == "pp") || (signs == "nn") ? c.i64_div_u( c.getLocal("c"), c.getLocal("s") ) : c.i64_xor( c.i64_div_u( c.getLocal("c"), c.getLocal("s") ), c.i64_const(0x1FFFFFFF) ) ) ); } if ((signs == "pn") || (signs == "np")) { f.addCode( c.call(prefix + "_addOne" + n , c.getLocal("r")) ); } } } function buildRecompose(n) { const sn = n/3; const f = module.addFunction(prefix+"_recompose"+n); f.addParam("s", "i32"); f.addParam("r", "i32"); f.addLocal("c", "i64"); const c = f.getCodeBuilder(); /* 0 sn sn*2 sn*3 sn*4 sn*5 0 sn sn*2 sn*3 sn*4 sn*5 sn+6 sn*7 sn*8 sn*9 b= Math.floor(i/sn) b*2*sn + (i-b*sn) b*2*sn + (i-b*sn) - sn */ for (let i=0; i